Commit 77e97d5a authored by Sikhin VC's avatar Sikhin VC

added accuracy improvement logic

parent 0b24b6da
Pipeline #49677 failed with stage
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
*pyc
# C extensions
#*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
.idea/
\ No newline at end of file
stages:
- auto-tagging
- build
- deploy
- update
variables:
MYSQL_CONNECTION: "mysql -h $MYSQL_HOST -u $MYSQL_USER -p$MYSQL_PASS "
# STATUS_SCRIPT: /home/gitlab-runner/monitor/deployment-status.sh
# HELM_CHART: /home/gitlab-runner/kubernetes/ilens/$QA_ENV/ilens-modules
# VARIABLES_YML: variables.yml
# DEPLOYMENT_YML: metadata-services.yml
TIMEOUT: 960s
before_script:
- val=`echo $($MYSQL_CONNECTION -e "SELECT COUNT(*) FROM $VERSION_DB.$DB_TABLE WHERE category='customer-implementations' AND type='AI' AND os='docker' AND module_name='$CI_PROJECT_NAME' ") | cut -d " " -f2`
- if [ $val == 0 ]; then $MYSQL_CONNECTION -e "INSERT INTO $VERSION_DB.$DB_TABLE values('customer-implementations','AI','$CI_PROJECT_NAME','docker', '0', '0', '0', '0')";fi
- QA=$($MYSQL_CONNECTION -N -e "SELECT qa FROM $VERSION_DB.$DB_TABLE where module_name = '$CI_PROJECT_NAME' AND type = 'AI' AND category = 'customer-implementations' AND os = 'docker'")
- DEV=$($MYSQL_CONNECTION -N -e "SELECT dev FROM $VERSION_DB.$DB_TABLE where module_name = '$CI_PROJECT_NAME' AND type = 'AI' AND category = 'customer-implementations' AND os = 'docker'")
- UAT=$(mysql -h $MYSQL_HOST -u $MYSQL_USER -p$MYSQL_PASS -N -e "SELECT uat FROM $VERSION_DB.$DB_TABLE where module_name = '$CI_PROJECT_NAME' AND type = 'AI' AND category = 'customer-implementations' AND os = 'docker'")
- PROD=$($MYSQL_CONNECTION -N -e "SELECT prod FROM $VERSION_DB.$DB_TABLE where module_name = '$CI_PROJECT_NAME' AND type = 'AI' AND category = 'customer-implementations' AND os = 'docker'")
auto-tagging:
stage: auto-tagging
before_script:
- val=`echo $($MYSQL_CONNECTION -e "SELECT COUNT(*) FROM $VERSION_DB.$VERSION_RELEASE_TABLE WHERE module_name='$CI_PROJECT_NAME' ") | cut -d " " -f2`
- if [ $val == 0 ]; then $MYSQL_CONNECTION -N -e "INSERT INTO $VERSION_DB.$VERSION_RELEASE_TABLE values('$CI_PROJECT_NAME', 'iLens', '0', '0', '0', '0')";fi
- ILENS=$($MYSQL_CONNECTION -N -e "SELECT ilens_version FROM "$VERSION_DB.$VERSION_RELEASE_TABLE" where module_name = '$CI_PROJECT_NAME'")
- RELEASE=$($MYSQL_CONNECTION -N -e "SELECT release_version FROM "$VERSION_DB.$VERSION_RELEASE_TABLE" where module_name = '$CI_PROJECT_NAME'")
- FEATURE=$($MYSQL_CONNECTION -N -e "SELECT feature_version FROM "$VERSION_DB.$VERSION_RELEASE_TABLE" where module_name = '$CI_PROJECT_NAME'")
- PATCH=$($MYSQL_CONNECTION -N -e "SELECT patch_version FROM "$VERSION_DB.$VERSION_RELEASE_TABLE" where module_name = '$CI_PROJECT_NAME'")
script:
- SOURCE_BRANCH=$(echo $CI_COMMIT_TITLE | cut -f 3 -d " " | cut -f 1 -d "/" | cut -f 2 -d "'")
- >
if [ "$SOURCE_BRANCH" = "QA" ]; then
((RELEASE=RELEASE+1)) && FEATURE=0 && PATCH=0;
TAG_NAME=v$RELEASE.$FEATURE
IMAGE_URL=azracrilensai.azurecr.io/repository/ilens-ai/customer-implementations/acc/$CI_PROJECT_NAME:"$TAG_NAME"
PROD=$RELEASE; QA=0; DEV=0;
$MYSQL_CONNECTION -e "UPDATE $VERSION_DB.$DB_TABLE SET prod='$PROD' ,qa='$QA', dev='$DEV' WHERE module_name='$CI_PROJECT_NAME' AND type='AI' AND category='customer-implementations' AND os='docker'"
elif [ $SOURCE_BRANCH == "feature" ]; then
((FEATURE=FEATURE+1)) && PATCH=0;
TAG_NAME=v$RELEASE.$FEATURE
IMAGE_URL=azracrilensai.azurecr.io/repository/ilens-ai/customer-implementations/acc/$CI_PROJECT_NAME:"$TAG_NAME"
elif [ $SOURCE_BRANCH == "patch" ]; then
((PATCH=PATCH+1));
TAG_NAME=v$RELEASE.$FEATURE.$PATCH
IMAGE_URL=azracrilensai.azurecr.io/repository/ilens-ai/customer-implementations/acc/$CI_PROJECT_NAME:"$TAG_NAME"
else
exit 1
fi
- echo -e "\n\nImage:" $IMAGE_URL >> ReleaseNote.txt
- sed -i "1s|^|Version":" $TAG_NAME\n|" ReleaseNote.txt
- sed -i "1s|^|Module Name":" $CI_PROJECT_NAME\n|" ReleaseNote.txt
- docker build -t $IMAGE_URL .
- docker push $IMAGE_URL
- docker rmi --force $IMAGE_URL
- URL=$(echo $CI_PROJECT_URL | sed 's|https://||')
- git remote set-url origin https://$GIT_USRNAME:$GIT_USRPASSWD@$URL
- git config user.email "devopsilens@gmail.com"
- git config user.name "$GIT_USRNAME"
- git tag -a $TAG_NAME -F ReleaseNote.txt
- git push origin $TAG_NAME
- $MYSQL_CONNECTION -e "UPDATE $VERSION_DB.$VERSION_RELEASE_TABLE SET release_version='$RELEASE', feature_version='$FEATURE', patch_version='$PATCH' WHERE module_name = '$CI_PROJECT_NAME' "
- $MYSQL_CONNECTION -e "INSERT INTO $HISTORY_DB.$VERSION_RELEASE_TABLE values('$CI_JOB_ID', '$CI_PROJECT_NAME','iLens', '$ILENS.$RELEASE.$FEATURE', '$CI_COMMIT_SHA', '$GITLAB_USER_NAME', '$CI_COMMIT_REF_NAME')"
tags:
- shell
only:
- master
#~~~~~| CODE QUALITY |~~~~~#
codequality:
stage: deploy
image: azacrknowledgelens.azurecr.io/knowledgelens/klit-operation/devops/gitlab-runner:ubuntu-sonarscanner
script:
- /opt/sonar-scanner/bin/sonar-scanner -Dsonar.projectKey=$CI_PROJECT_NAME -Dsonar.projectName=$CI_PROJECT_NAME -Dsonar.typescript.node=./node/node -Dsonar.login=admin -Dsonar.password=admin -Dsonar.sources=.
- sleep 5
- python3 /opt/code_quality_report/static_code_quality_report_csv_v2.py $CI_PROJECT_NAME $GITLAB_USER_EMAIL,$EMAIL_TO $EMAIL_FROM $EMAIL_PASSWD False admin admin
only:
- develop
tags:
- docker
File added
import os
from fastapi import FastAPI
from fastapi import File
import cv2
import numpy as np
import io
import uvicorn
from datetime import datetime
ROOT_DIR = os.path.abspath(os.curdir)
os.environ["config"]="{\"TZ\": \"Asia/Kolkata\", \"MONGO_URI\": \"mongodb://192.168.0.220:2717/admin\", \"MONGO_DATABASE\": \"ilens_ai\", \"MONGO_COLLECTION\": \"janusDeployment\", \"MONGO_KEY\": \"deploymentId\", \"MONGO_VALUE\": \"ACCCementCamera20_61275102\", \"MONGO_COLL\": \"serviceConfiguration\", \"MONGO_DB\": \"ilens_ai\"}"
from edge_engine.edge_processor import ExecutePipeline
from edge_engine.edge_processor import Pubs
from scripts import CementBagCounter
from edge_engine.common.config import EDGE_CONFIG
pubs = Pubs()
mod = CementBagCounter(config=EDGE_CONFIG,
model_config=EDGE_CONFIG["modelConfig"],
pubs=pubs,
device_id=EDGE_CONFIG['deviceId'])
app = FastAPI()
@app.post("/")
def extract_text_diectly(file: bytes = File(...)):
try:
stream = io.BytesIO(file)
image = np.asarray(bytearray(stream.read()), dtype="uint8")
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
file="archive/sample_"+str(datetime.now())+".jpg"
file=file.replace(" ","_")
file=file.replace("-","_")
file = file.replace(":", "_")
cv2.imwrite(file, image)
ex = ExecutePipeline(mod,image)
ex.run_model()
return mod.text_json
except Exception as e:
print(e)
if __name__ == '__main__':
uvicorn.run("app:app", host="localhost", port=8290)
source /opt/intel/openvino/bin/setupvars.sh
python3 app.py
\ No newline at end of file
from edge_engine.ai.model.modelwraper import ModelWrapper
from abc import ABC, abstractmethod
class ModelWrapper(ABC):
def __init__(self, path=None):
"""Implement code to load mask_model here"""
pass
def _pre_process(self, x):
"""Implement code to process raw input into format required for mask_model inference here"""
return x
def _post_process(self, x):
"""Implement any code to post-process mask_model inference response here"""
return x
@abstractmethod
def _predict(self, x):
"""Implement core mask_model inference code here"""
pass
def predict(self, x):
pre_x = self._pre_process(x)
prediction = self._predict(pre_x)
result = self._post_process(prediction)
return result
# import the necessary packages
import cv2
import numpy as np
class GammaPreprocessor:
def __init__(self, gamma=1.0):
# creating Gamma table
self.invGamma = 1.0 / gamma
self.table = np.array([((i / 255.0) ** self.invGamma) * 255
for i in np.arange(0, 256)]).astype("uint8")
def preprocess(self, image):
return cv2.LUT(image, self.table)
# import the necessary packages
from keras.preprocessing.image import img_to_array
class ImageToArrayPreprocessor:
def __init__(self, dataFormat=None):
# store the image data format
self.dataFormat = dataFormat
def preprocess(self, image):
# apply the Keras utility function that correctly rearranges
# the dimensions of the image
return img_to_array(image, data_format=self.dataFormat)
# import the necessary packages
import cv2
class SimpleHistogramPreprocessor:
def __init__(self):
pass
def preprocess(self, image):
# Run Histogram simple Equalization
return cv2.equalizeHist(image)
# import the necessary packages
import cv2
class SimplePreprocessor:
def __init__(self, width, height, inter=cv2.INTER_AREA):
# store the target image width, height, and interpolation
# method used when resizing
self.width = width
self.height = height
self.inter = inter
def preprocess(self, image):
# resize the image to a fixed size, ignoring the aspect
# ratio
return cv2.resize(image, (self.width, self.height),
interpolation=self.inter)
import os
import sys
from edge_engine.common.constants import LicenseModule
from dateutil import parser
from datetime import datetime
from pymongo import MongoClient
from copy import deepcopy
import json
def licence_validator(payload):
try:
dt = parser.parse(payload['valid_till'])
now = datetime.now()
if (now > dt):
sys.stdout.write("Licence Expired \n".format())
sys.stdout.flush()
return False
return True
except KeyError as e:
sys.stderr.write("Error loading licence")
return False
def get_config_from_mongo(mongo_uri, dbname, basecollection,
key, value):
mongo = MongoClient(mongo_uri)
db = mongo[dbname]
config = db[basecollection].find_one({key: value}, {"_id": False})
return config
def load_conf(config,mongo_uri, dbname):
mongo = MongoClient(mongo_uri)
db = mongo[dbname]
pub_configs = []
for conf in config['pubConfigs']:
if conf["type"].lower() in ["mqtt","mongo",]:
key= conf["key"]
value=conf["value"]
collection = conf["conectionCollection"]
pub_conf = db[collection].find_one({key: value}, {"_id": False})
pub_conf.update(conf)
pub_configs.append(pub_conf)
else :
pub_configs.append(conf)
config['pubConfigs'] = pub_configs
return config
# """
# {
# "MONGO_URI": "mongodb://192.168.3.220:21017",
# "MONGO_DATABASE": "ilens_thermal_app",
# "MONGO_COLLECTION": "janus_deployment_details",
# "MONGO_KEY": "deploymentId",
# "MONGO_VALUE": "ddd"
# }
# """
LOG_LEVEL = os.environ.get("LOG_LEVEL", default="INFO").upper()
LOG_HANDLER_NAME = os.environ.get("LOG_HANDLER_NAME", default="ilens-edge_engine")
BASE_LOG_PATH = os.environ.get('BASE_LOG_PATH',
default=os.path.join(os.getcwd(), "logs".format()))
if not os.path.isdir(BASE_LOG_PATH):
os.mkdir(BASE_LOG_PATH)
CONFIG_ENV = json.loads(os.environ.get('config', default=None))
sys.stdout.write("config->{} \n".format(json.dumps(CONFIG_ENV)))
MONGO_URI = CONFIG_ENV.get('MONGO_URI', None)
MONGO_DATABASE = CONFIG_ENV.get('MONGO_DATABASE', None)
MONGO_COLLECTION = CONFIG_ENV.get('MONGO_COLLECTION', None)
MONGO_KEY = CONFIG_ENV.get('MONGO_KEY', None)
MONGO_VALUE = CONFIG_ENV.get('MONGO_VALUE',None)
if MONGO_URI == None \
or MONGO_DATABASE is None \
or MONGO_COLLECTION is None \
or MONGO_KEY is None \
or MONGO_VALUE is None:
sys.stderr.write("invalid mongo config \n")
sys.exit(1)
EDGE_CONFIG = get_config_from_mongo(
mongo_uri=MONGO_URI,
dbname=MONGO_DATABASE, basecollection=MONGO_COLLECTION,
key=MONGO_KEY, value=MONGO_VALUE
)
DEVICE_ID = EDGE_CONFIG["deviceId"]
if EDGE_CONFIG is None:
sys.stderr.write("invalid EDGE_CONFIG config \n")
sys.exit(1)
EDGE_CONFIG=load_conf(EDGE_CONFIG, mongo_uri=MONGO_URI,
dbname=MONGO_DATABASE)
DATA_PATH = EDGE_CONFIG["inputConf"].get('dataPath',os.path.join(os.getcwd(), "data".format()))
sys.stderr.write("Loading data from {} \n".format(DATA_PATH))
\ No newline at end of file
import os
import sys
import configparser
import json
from scripts.common.constants import LicenseModule
import base64
import jwt
from dateutil import parser
from datetime import datetime
import yaml
from copy import deepcopy
def licence_validator(payload):
try:
dt = parser.parse(payload['valid_till'])
now = datetime.now()
if (now > dt):
sys.stdout.write("Licence Expired \n".format())
sys.stdout.flush()
return False
return True
except KeyError as e:
sys.stderr.write("Error loading licence")
return False
CONFIGURATION_FILE = os.environ.get("CONF_PATH", os.path.join(os.getcwd(), "conf{0}settings.conf".format(os.sep)))
sys.stdout.write("Reading Config from {} \n".format(CONFIGURATION_FILE))
sys.stdout.flush()
__config = configparser.ConfigParser()
__config.read(CONFIGURATION_FILE)
LOG_LEVEL = os.environ.get("LOG_LEVEL", __config.get('LOGGER', 'loglevel', fallback="DEBUG")).upper()
LOG_HANDLER_NAME = os.environ.get("LOG_HANDLER_NAME", __config.get('LOGGER', 'loglevel', fallback="face-id"))
BASE_LOG_PATH = os.environ.get('BASE_LOG_PATH',
__config.get('LOGGER', 'basepath', fallback=os.path.join(os.getcwd(), "logs".format())))
if not os.path.isdir(BASE_LOG_PATH):
os.mkdir(BASE_LOG_PATH)
DATA_PATH = os.environ.get('BASE_DATA_PATH', __config.get('CLIENT-CONFIG', 'basepath', fallback=os.path.normpath(
os.getcwd() + '{}data'.format(os.sep))))
sys.stdout.write("Data base path {} \n".format(DATA_PATH))
DEVICE_ID = os.environ.get("DEVICE_ID", __config.get('CLIENT-CONFIG', 'deviceid', fallback="<undefined>"))
LICENCE_FILE = os.environ.get("LICENCE_FILE", __config.get('CLIENT-CONFIG', 'licence-life', fallback="license.lic"))
LICENCE_FILE = os.path.join(DATA_PATH, LICENCE_FILE)
MODEL_CONFIG_FILE = os.environ.get("MODEL_CONFIG_FILE",
__config.get('CLIENT-CONFIG', 'mask_model=config-path', fallback="mask_model-config.yaml"))
MODEL_CONFIG_FILE = os.path.join(DATA_PATH, MODEL_CONFIG_FILE)
with open(MODEL_CONFIG_FILE, 'r') as stream:
try:
MODEL_CONFIG = yaml.safe_load(stream)
except yaml.YAMLError as exc:
sys.stderr.write(" **Error loading mask_model config ** \n")
sys.stderr.flush()
#sys.exit(1)
PUBLISHERS = os.environ.get("PUBLISHERS", __config.get('CLIENT-CONFIG', 'publishers', fallback="")).split(",")
SUBSCRIBER = os.environ.get("SUBSCRIBER", __config.get('CLIENT-CONFIG', 'subscriber', fallback=""))
if len(PUBLISHERS) == 0:
sys.stdout.write("Empty publishers \n")
CLIENT_ID = os.environ.get("CLIENT_ID", __config.get('CLIENT-CONFIG', 'client-id', fallback="test-client1"))
STREAM_TYPE = os.environ.get("STREAM_TYPE", __config.get('STREAM-CONFIG', 'stream-type', fallback="usbcam"))
MQTT_CONFIG = {"broker": "", "port": "", "topic": "", "clientId": ""}
MQTT_SUB_CONFIG = deepcopy(MQTT_CONFIG)
FRAME_WRITE_CONFIG = {"basepath": None, "filenameFormat": "{deviceId}_{frameId}", "format": "jpg"}
VIDEO_WRITE_CONFIG = {"basepath": None, "format": "mp4"}
MONGO_WRITE_CONFIG = {"host": None, "port": 27017, "authSource": None, "collection": None, "dbname": None,
"keys": "frameId,deviceId,timestamp,metric"}
PUBLISHER_CONFIGS = []
for pub in PUBLISHERS:
type, Section = pub.split(":")
if type == "MQTT":
conf = deepcopy(MQTT_CONFIG)
conf["broker"] = os.environ.get("{}_broker".format(Section), __config.get(Section, 'broker'))
conf["port"] = int(os.environ.get("{}_port".format(Section), __config.get(Section, 'port')))
conf["topic"] = os.environ.get("{}_topic".format(Section), __config.get(Section, 'topic'))
conf["clientId"] = os.environ.get("{}_clientid".format(Section),
__config.get(Section, 'clientid', fallback="ghf-test-00"))
conf["type"] = type
PUBLISHER_CONFIGS.append(conf)
elif type == "FRAMEWRITE":
conf = deepcopy(FRAME_WRITE_CONFIG)
conf["basepath"] = os.environ.get("{}_basepath".format(Section),
__config.get(Section, 'basepath', fallback=os.path.normpath(
os.getcwd() + '{sep}out{sep}frames'.format(sep=os.sep))))
conf["iformat"] = os.environ.get("{}_iformat".format(Section), __config.get(Section, 'iformat', fallback="jpg"))
conf["filenameFormat"] = os.environ.get("{}_file_name_format".format(Section),
__config.get(Section, 'file_name_format',
fallback="{deviceId}_{frameId}"))
if not os.path.isdir(conf["basepath"]):
sys.stdout.write("Creating {} \n".format(conf["basepath"]))
os.mkdir(conf["basepath"])
conf["type"] = type
PUBLISHER_CONFIGS.append(conf)
elif type == "VIDEOWRITE":
conf = deepcopy(VIDEO_WRITE_CONFIG)
conf["basepath"] = os.environ.get("{}_basepath".format(Section),
__config.get(Section, 'basepath', fallback=os.path.normpath(
os.getcwd() + '{sep}out{sep}videos'.format(sep=os.sep))))
# conf["format"] = os.environ.get("{}_format".format(Section),__config.get(Section, 'file_name_format',fallback="mp4"))
conf["filenameFormat"] = os.environ.get("{}_file_name_format".format(Section),
__config.get(Section, 'file_name_format',
fallback="{deviceId}_{timestamp}"))
conf["fps"] = int(os.environ.get("{}_fps".format(Section),
__config.get(Section, 'fps',
fallback=30)))
conf["dims"] = tuple(os.environ.get("{}_dims".format(Section),
__config.get(Section, 'dims',
fallback="620,320")).split(","))
if not os.path.isdir(conf["basepath"]):
sys.stdout.write("Creating {} \n".format(conf["basepath"]))
os.mkdir(conf["basepath"])
conf["type"] = type
PUBLISHER_CONFIGS.append(conf)
elif type == "MONGO":
conf = deepcopy(MONGO_WRITE_CONFIG)
conf["host"] = os.environ.get("{}_host".format(Section),
__config.get(Section, 'host', fallback="localhost"))
conf["port"] = int(os.environ.get("{}_port".format(Section),
__config.get(Section, 'port', fallback="test")))
conf["dbname"] = os.environ.get("{}_dbname".format(Section),
__config.get(Section, 'dbname', fallback="test"))
conf["collection"] = os.environ.get("{}_collection".format(Section),
__config.get(Section, 'collection', fallback="test"))
conf["authSource"] = os.environ.get("{}_authsource".format(Section),
__config.get(Section, 'authsource', fallback=None))
conf["username"] = os.environ.get("{}_username".format(Section),
__config.get(Section, 'username', fallback=None))
conf["password"] = os.environ.get("{}_password".format(Section),
__config.get(Section, 'password', fallback=None))
if len(conf["authSource"]) == 0:
conf["authSource"] = None
conf["keys"] = os.environ.get("{}_keys".format(Section),
__config.get(Section, 'keys',
fallback="frameId,deviceId,timestamp,metric")).split(",")
conf["type"] = type
PUBLISHER_CONFIGS.append(conf)
else:
sys.stderr.write("Unsupported publisher {} \n".format(type))
if STREAM_TYPE in ["rtsp", "usbcam"]:
FRAME_WIDTH = os.environ.get("FRAME_WIDTH", __config.get('STREAM-CONFIG', 'width', fallback=480))
FRAME_HEIGHT = os.environ.get("FRAME_HEIGHT", __config.get('STREAM-CONFIG', 'height', fallback=640))
LATENCY = os.environ.get("LATENCY", __config.get('STREAM-CONFIG', 'latency', fallback=0))
FRAME_RATE = os.environ.get("FRAME_RATE", __config.get('STREAM-CONFIG', 'framerate', fallback="30/1"))
F_FORMAT = os.environ.get("F_FORMAT", __config.get('STREAM-CONFIG', 'fformat', fallback="BGRx"))
USE_GSTREAMER = int(os.environ.get("USE_GSTREAMER", __config.get('STREAM-CONFIG', 'use-gstreamer', fallback=1)))
CUSTOM_PIPELINE = os.environ.get("CUSTOM_PIPELINE", __config.get('STREAM-CONFIG', 'custom-pipeline', fallback=None))
URI = os.environ.get("URI", __config.get('STREAM-CONFIG', 'uri', fallback=0))
BUILD_CONFIG = {
"sourceType": STREAM_TYPE,
"width": FRAME_WIDTH,
"height": FRAME_HEIGHT,
"latency": LATENCY,
"framerate": FRAME_RATE,
"format": F_FORMAT,
"gstreamer": USE_GSTREAMER,
"uri": URI
}
elif STREAM_TYPE == "mqtt":
MQTT_SUB_CONFIG["broker"] = os.environ.get("STREAM-CONFIG_broker", __config.get('STREAM-CONFIG', 'broker'))
MQTT_SUB_CONFIG["port"] = os.environ.get("STREAM-CONFIG_port", __config.get('STREAM-CONFIG', 'port'))
MQTT_SUB_CONFIG["topic"] = os.environ.get("STREAM-CONFIG_topic", __config.get('STREAM-CONFIG', 'topic'))
MQTT_SUB_CONFIG["clientId"] = os.environ.get("STREAM-CONFIG_clientid",
__config.get('STREAM-CONFIG', 'clientid', fallback="sdrt"))
MQTT_SUB_CONFIG["sourceType"] = STREAM_TYPE
BUILD_CONFIG = MQTT_SUB_CONFIG
elif STREAM_TYPE == "videofile":
BUILD_CONFIG = {"uri": os.environ.get("URI", __config.get('STREAM-CONFIG', 'uri', fallback=0)),
"queueSize": os.environ.get("QUEUE_SIZE",
__config.get('STREAM-CONFIG', 'queue_size', fallback=128)),
"sourceType": STREAM_TYPE}
else:
raise ValueError("unsupported source {}".format(STREAM_TYPE))
sys.stdout.write("Loading Licence {} \n".format(LICENCE_FILE))
try:
private_key = LicenseModule.private_key
encoding_algorithm = LicenseModule.encoding_algorithm
LICENCE_PAYLOAD = jwt.decode(bytes(base64.b64decode(open(LICENCE_FILE, 'rb').read())), private_key,
algorithms=[encoding_algorithm])
except Exception as e:
sys.stderr.write("{}".format(e))
sys.stderr.write(" **Error loading licence** \n")
sys.stderr.flush()
#sys.exit(1)
class LicenseModule:
private_key = "3139343831323738414d47454e3936363538373136"
encoding_algorithm = "HS256"
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from edge_engine.common.config import LOG_LEVEL, LOG_HANDLER_NAME, BASE_LOG_PATH
import logging
from logging.handlers import RotatingFileHandler
from logging import WARNING,INFO,DEBUG,ERROR
import os
DEFAULT_FORMAT = '%(asctime)s %(levelname)5s %(name)s %(message)s'
DEBUG_FORMAT = '%(asctime)s %(levelname)5s %(name)s [%(threadName)5s:%(filename)5s:%(funcName)5s():%(lineno)s] %(message)s'
EXTRA = {}
FORMATTER = DEFAULT_FORMAT
if LOG_LEVEL.strip() == "DEBUG":
FORMATTER = DEBUG_FORMAT
def get_logger(log_handler_name, extra=EXTRA):
"""
Purpose : To create logger .
:param log_handler_name: Name of the log handler.
:param extra: extra args for the logger
:return: logger object.
"""
log_path = os.path.join(BASE_LOG_PATH, log_handler_name + ".log")
logstash_temp = os.path.join(BASE_LOG_PATH, log_handler_name + ".db")
logger = logging.getLogger(log_handler_name)
logger.setLevel(LOG_LEVEL.strip().upper())
log_handler = logging.StreamHandler()
log_handler.setLevel(LOG_LEVEL)
formatter = logging.Formatter(FORMATTER)
log_handler.setFormatter(formatter)
handler = RotatingFileHandler(log_path, maxBytes=10485760,
backupCount=5)
handler.setFormatter(formatter)
logger.addHandler(log_handler)
logger.addHandler(handler)
logger = logging.LoggerAdapter(logger, extra)
return logger
logger = get_logger(LOG_HANDLER_NAME)
import os, time
from minio import Minio
from edge_engine.common.logsetup import logger
class MinioClient:
def __init__(self ,SECRET_KEY, ACCESS_KEY, BUCKET_NAME, LOCAL_DATA_PATH, MINIO_IP):
logger.info("Initalizing minioclient !!")
self.SECRET_KEY = SECRET_KEY
self.ACCESS_KEY = ACCESS_KEY
self.BUCKET_NAME = BUCKET_NAME
self.LOCAL_DATA_PATH = LOCAL_DATA_PATH
self.MINIO_IP = MINIO_IP
self.logfile = "./logs/videowrite.log"
self.minioClient = self.connect_to_minio()
self.create_bucket(self.BUCKET_NAME)
def connect_to_minio(self):
if self.SECRET_KEY is not None and self.ACCESS_KEY is not None:
logger.info("Connecting to Minio Service... !!! ")
minio_client = Minio(self.MINIO_IP, access_key = self.ACCESS_KEY, secret_key = self.SECRET_KEY,
region='us-east-1', secure=False)
return minio_client
else:
logger.info('Access Key and Secret Key String cannot be null')
raise Exception('Access Key and Secret Key String cannot be null')
def create_bucket(self, bucket_name):
try:
if bucket_name not in self.list_buckets():
logger.info("Creating bucket {}...".format(bucket_name))
self.minioClient.make_bucket(bucket_name, location="us-east-1")
else:
logger.info("Bucket already exists....")
except Exception as err:
logger.error(err)
def save_to_bucket(self, bucket_name, data_obj):
try:
with open(data_obj, 'rb') as file:
file_stat = os.stat(data_obj)
self.minioClient.put_object(bucket_name, data_obj.split(self.LOCAL_DATA_PATH)[1],
file, file_stat.st_size)
except Exception as err:
logger.error(err)
def list_buckets(self):
bucketobjects = self.minioClient.list_buckets()
bucketlist = []
for eachbucket in bucketobjects:
bucketlist.append(eachbucket.name)
return bucketlist
def read_write_logs(self):
try:
f = open(self.logfile)
except Exception as err:
print(err)
with open(self.logfile, "a") as startfile:
startfile.write("")
f = open(self.logfile)
return [line.split('\n')[0] for line in f]
def write_write_logs(self, log_str):
with open(self.logfile, "a") as my_file:
my_file.write(log_str + "\n")
def upload(self):
if self.LOCAL_DATA_PATH[-1]!='/':
self.LOCAL_DATA_PATH = self.LOCAL_DATA_PATH+"/"
while True:
listoffiles = [os.path.join(path, name) for path, subdirs, files in os.walk(self.LOCAL_DATA_PATH) for name in files]
listofwrittenfiles = self.read_write_logs()
listofnewfiles = list(set(listoffiles) - set(listofwrittenfiles))
for fileName in listofnewfiles:
try:
logger.info("Uploading {}..".format(fileName.split(self.LOCAL_DATA_PATH)[1]))
self.save_to_bucket(self.BUCKET_NAME, fileName)
self.write_write_logs(fileName)
except Exception as e:
logger.error(e)
time.sleep(5)
# if __name__=='__main__':
# SECRET_KEY = 'minioadmin'
# ACCESS_KEY = 'minioadmin'
# BUCKET_NAME = 'videobucket'
# MINIO_IP = '192.168.3.220:29000'
# LOCAL_DATA_PATH = "F:/GDrive Data/Downloads"
# obj = MinioClient(SECRET_KEY, ACCESS_KEY, BUCKET_NAME, LOCAL_DATA_PATH, MINIO_IP)
# obj.upload()
from edge_engine.common.logsetup import logger
from edge_engine.common.config import EDGE_CONFIG
from edge_engine.streamio.datastream import MQTT
from edge_engine.streamio.datastream import VideoOutputStream
from edge_engine.streamio.datastream import FrameOutputStream
from edge_engine.streamio.datastream import FFMPEGOutputStream
from edge_engine.streamio.datastream import MongoDataStreamOut
from edge_engine.streamio.videostream import ThreadedVideoStream
from edge_engine.streamio.videostream import FileVideoStream
from edge_engine.streamio.frameProcessor import FrameProcessor, FrameProcessorv2
from edge_engine.common.minio_server import MinioClient
import json
from threading import Thread
import time
import os
from edge_engine.streamio.videostream.filepathvideostream import FilePathVideoStream
class Pubs():
def __init__(self):
self.mqtt_pub = None
self.frame_write = None
self.video_write = None
self.mongo_write = None
self.rtp_write = None
self.build_pubs()
if 'minioConfig' in EDGE_CONFIG.keys() and \
isinstance(EDGE_CONFIG["minioConfig"],dict):
self.minio_thread = self.start_minio(EDGE_CONFIG["minioConfig"])
@staticmethod
def start_minio(minio_conf):
obj = MinioClient(minio_conf['secretKey'], minio_conf['accessKey'],
minio_conf['bucketName'], minio_conf['localDataPath'],
minio_conf['ip'])
t = Thread(target=obj.upload)
t.start()
return t
def build_pubs(self):
logger.info("building publishers ")
for conf in EDGE_CONFIG["pubConfigs"]:
if conf["type"].upper() == "MQTT":
self.mqtt_pub = MQTT(broker=conf["broker"], topic=conf["topic"], port=conf["port"]
, publish_hook=json.dumps)
elif conf["type"].upper() == "FRAMEWRITE":
self.frame_write = FrameOutputStream(
basepath=conf["basepath"],
iformat=conf["iformat"],
filenameFormat=conf["filenameFormat"],
publish_hook=None)
elif conf["type"].upper() == "VIDEOWRITE":
self.video_write = VideoOutputStream(basepath=conf["basepath"],
dims=conf["dims"],
filenameFormat=conf["filenameFormat"],
fps=conf["fps"], publish_hook=None)
elif conf["type"].upper() == "MONGO":
self.mongo_write = MongoDataStreamOut(host=conf["host"],
port=conf["port"],
dbname=conf["dbname"],
collection=conf["collection"],
keys=conf["keys"],
authsource=conf["authSource"],
username=conf["username"],
password=conf["password"],
publish_hook=None)
elif conf["type"].upper() == "RTP":
self.rtp_write = FFMPEGOutputStream(conf["ffmpegCmd"], conf["RTPEndpoint"]
, publish_hook=None)
else:
logger.error("Unsupported publisher {}".format(conf["type"]))
class ExecutePipeline:
def __init__(self, model,input_image):
self.model = model
self.input_image=input_image
def run_model(self):
if EDGE_CONFIG["inputConf"]["sourceType"].lower() in ["rtsp", "usbcam"]:
logger.info("Selected input stream as Direct cv input")
self.threadedVideoStream = ThreadedVideoStream(stream_config=EDGE_CONFIG["inputConf"])
self.threadedVideoStream.start()
self.frameProcessor = FrameProcessor(stream=self.threadedVideoStream,
model=self.model)
elif EDGE_CONFIG["inputConf"]["sourceType"].lower() == "videofile":
self.fileVideoStream = FileVideoStream(stream_config=EDGE_CONFIG["inputConf"])
self.fileVideoStream.start()
self.frameProcessor = FrameProcessor(stream=self.fileVideoStream, model=self.model)
elif EDGE_CONFIG["inputConf"]["sourceType"].lower() == "videopath":
processed_videos = []
videopath = EDGE_CONFIG["inputConf"].get('uri',"")
while True:
time1= time.time()
video_files = os.listdir(videopath)
if not video_files:
time.sleep(0.01)
continue
video_files.sort()
for video in video_files:
if video in processed_videos:
continue
if not video.endswith(".mp4"):
processed_videos.append(video)
continue
processed_videos.append(video)
EDGE_CONFIG["inputConf"]['uri'] = os.path.join(videopath, video)
self.fileVideoStream = FilePathVideoStream(EDGE_CONFIG["inputConf"]['uri'])
self.fileVideoStream.start()
self.frameProcessor = FrameProcessorv2(stream=self.fileVideoStream, model=self.model)
self.start_model()
logger.info("---------------")
logger.info(time.time()-time1)
# logger.info(int(time.time())-time2)
# logger.info(int(time.time())-time3)
logger.info("---------------")
elif EDGE_CONFIG["inputConf"]["sourceType"].lower() == "api_call":
processed_images = []
# imagepath = EDGE_CONFIG["inputConf"].get('uri', "")
# # while True:
# #write
# time1 = time.time()
#
# image_files = os.listdir(imagepath)
# # if not image_files:
# # time.sleep(0.01)
# # continue
# image_files.sort()
# for images in image_files:
# if images in processed_images:
# continue
# if not images.endswith(".jpg"):
# processed_images.append(images)
# continue
# print(images)
#
# processed_images.append(images)
# EDGE_CONFIG["inputConf"]['uri'] = os.path.join(imagepath, images)
# print(EDGE_CONFIG["inputConf"]['uri'])
# self.fileVideoStream = FilePathVideoStream(EDGE_CONFIG["inputConf"]['uri'])
self.frameProcessor = FrameProcessorv2(stream=self.input_image, model=self.model)
self.start_model()
logger.info("---------------")
# logger.info(time.time() - time1)
# logger.info(int(time.time())-time2)
# logger.info(int(time.time())-time3)
logger.info("---------------")
else:
raise ValueError("unsupported source {}".format(EDGE_CONFIG["inputConf"]["sourceType"]))
self.start_model()
def start_model(self):
self.frameProcessor.run_model()
# self.thread = Thread(target=self.frameProcessor.run_model(), args=())
# print("running start model")
# self.thread.daemon = True
# self.thread.start()
from edge_engine.streamio.frameProcessor import FrameProcessor
from .datastreamprocessor import DataStreamProcessor
from edge_engine.streamio.datastream.datastreamwrapper import DataStreamWrapper
from edge_engine.streamio.datastream.mongodatastreamout import MongoDataStreamOut
from edge_engine.streamio.datastream.frameoutputstream import FrameOutputStream
from edge_engine.streamio.datastream.videooutputstream import VideoOutputStream
from edge_engine.streamio.datastream.mqttstream import MQTT
from edge_engine.streamio.datastream.ffmpegdata_streamout import FFMPEGOutputStream
\ No newline at end of file
from abc import ABC, abstractmethod
class DataStreamWrapper(ABC):
def __init__(self):
"""Implement code to load mask_model here"""
pass
def publish(self, x):
"""Implement code to publish"""
return x
def subscribe(self,hook):
"""Implement code to subscribe"""
return None
\ No newline at end of file
from edge_engine.streamio.datastream.datastreamwrapper import DataStreamWrapper
import subprocess as sp
class FFMPEGOutputStream(DataStreamWrapper):
def __init__(self, ffmpeg_cmd, rtp_endpoint, publish_hook=None):
super().__init__()
self.ffmpeg_cmd = ffmpeg_cmd
self.rtp_endpoint = rtp_endpoint
self.ffmpeg_cmd.append(self.rtp_endpoint[0])
self.proc = sp.Popen(self.ffmpeg_cmd, stdin=sp.PIPE, shell=False)
self.publish_hook = publish_hook
def publish(self, x):
if self.publish_hook is not None:
x = self.publish_hook(x)
# print("value of x: ",x)
# print("value:",x)
frame = x["frame"]
self.proc.stdin.write(frame.tostring())
self.proc.stdin.flush()
from edge_engine.streamio.datastream.datastreamwrapper import DataStreamWrapper
import cv2
import base64
import numpy as np
import os
from edge_engine.common.logsetup import logger
from datetime import datetime
class FrameOutputStream(DataStreamWrapper):
def __init__(self, basepath, iformat="jpg", filenameFormat="{deviceId}_{frameId}_{timestamp}", publish_hook=None):
super().__init__()
self.basepath = basepath
self.iformat = iformat
self.filenameFormat = filenameFormat
self.publish_hook = publish_hook
def publish(self, x):
if self.publish_hook is not None:
x= self.publish_hook(x)
frame = x["frame"]
# frame = base64.b64decode(frame.split("data:image/jpeg;base64,")[1])
# frame = np.fromstring(frame, np.uint8)
# frame = cv2.imdecode(frame, cv2.IMREAD_COLOR)
path = os.path.join(self.basepath, datetime.now().date().isoformat())
print("path : ",path)
if not os.path.isdir(path):
logger.info("Creating {} \n".format(path))
os.mkdir(path)
cv2.imwrite("{path}.{iformat}".format(path=os.path.join(path, self.filenameFormat.format(**x)),
iformat=self.iformat), frame)
return True
def subscribe(self, hook):
super().subscribe(hook)
from edge_engine.streamio.datastream.datastreamwrapper import DataStreamWrapper
from pymongo import MongoClient
class MongoDataStreamOut(DataStreamWrapper):
def __init__(self, host, port, dbname, collection, keys, authsource,username=None,password=None, publish_hook=None):
super().__init__()
self.host = host
self.port = port
self.dbname = dbname
self.username = username
self.password = password
self.collection = collection
self.publish_hook = publish_hook
self.mongo = MongoClient(host=host,
port=int(port),username=self.username,password=self.password)
self.db = self.mongo[dbname]
self.keys = keys
self.authsource = authsource
def subscribe(self, hook=None):
pass
def publish(self, data):
if self.publish_hook is not None:
data = self.publish_hook(data)
fin_dat = {}
for k, v in data.items():
if k in self.keys:
fin_dat[k] = v
self.db[self.collection].insert(fin_dat)
import paho.mqtt.client as paho
from edge_engine.streamio.datastream.datastreamwrapper import DataStreamWrapper
from edge_engine.common.logsetup import logger
from uuid import uuid4
import traceback
class MQTT(DataStreamWrapper):
@staticmethod
def on_connect(client, userdata, flags, rc):
logger.info("Connection returned with result code:" + str(rc))
@staticmethod
def on_disconnect(client, userdata, rc):
logger.info("Disconnection returned result:" + str(rc))
@staticmethod
def on_subscribe(client, userdata, mid, granted_qos):
logger.debug("Subscribing MQTT {} {} {} {}".format(client, userdata, mid, granted_qos))
def on_message(self, client, userdata, msg):
logger.debug("Received message, topic:" + msg.topic + "payload:" + str(msg.payload))
if self.subscribe_hook is not None:
self.subscribe_hook(msg.payload.decode())
def __init__(self, broker, port, topic, qos=2, subscribe_hook=None, publish_hook=None):
super().__init__()
self.broker = broker
self.port = int(port)
self.topic = topic
self.client_name = "{}".format(uuid4())
self.client = paho.Client(self.client_name)
self.client.on_connect = self.on_connect
self.client.on_disconnect = self.on_disconnect
self.client.on_subscribe = self.on_subscribe
self.client.on_message = self.on_message
self.client.connect(host=self.broker, port=self.port)
self.subscribe_hook = subscribe_hook
self.publish_hook = publish_hook
self.qos = qos
def subscribe(self, hook=None):
if hook is not None:
self.subscribe_hook =hook
self.client.subscribe((self.topic, self.qos))
self.client.loop_forever()
def publish(self, data):
try:
if self.publish_hook is not None:
data = self.publish_hook(data)
self.client.publish(self.topic, data)
except Exception as e:
logger.error(e)
logger.error(traceback.format_exc())
from edge_engine.streamio.datastream.datastreamwrapper import DataStreamWrapper
import cv2
import base64
import numpy as np
import os
from edge_engine.common.logsetup import logger
from datetime import datetime
class VideoOutputStream(DataStreamWrapper):
def __init__(self, basepath, dims, filenameFormat="{deviceId}_{timestamp}", fps=30, publish_hook=None):
super().__init__()
self.basepath = basepath
self.dims = (int(dims[0]),int(dims[1]))
self.fps = float(fps)
self.filenameFormat = filenameFormat
self.publish_hook = publish_hook
self.four_cc = cv2.VideoWriter_fourcc(*'mp4v')
self.out = None
def publish(self, x):
if self.publish_hook is not None:
x = self.publish_hook(x)
if len(x["metric"]) > 0:
if self.out is None:
path = os.path.join(self.basepath, datetime.now().date().isoformat())
if not os.path.isdir(path):
logger.info("Creating {} \n".format(path))
os.mkdir(path)
self.out = cv2.VideoWriter("{}.mp4".format(os.path.join(path, self.filenameFormat.format(**x))),
self.four_cc, self.fps, self.dims)
frame = x["frame"]
frame = base64.b64decode(frame.split("data:image/jpeg;base64,")[1])
frame = np.fromstring(frame, np.uint8)
frame = cv2.imdecode(frame, cv2.IMREAD_COLOR)
self.out.write(frame)
else:
if self.out is not None:
self.out.release()
self.out = None
return True
def subscribe(self, hook):
super().subscribe(hook)
from edge_engine.common.logsetup import logger
class DataStreamProcessor:
def __init__(self, model, subsciber, publishers=list()):
self.model = model
self.subsciber = subsciber
self.publishers = publishers
logger.info("Setting up frame processor !!")
def processstream(self, msg):
print(msg)
def run_model(self):
self.subsciber.subscribe(hook=self.processstream)
import time
from edge_engine.common.logsetup import logger
from edge_engine.common.config import DEVICE_ID
from uuid import uuid4
import traceback
import cv2
class FrameProcessor:
def __init__(self, stream, model):
self.model = model
self.stream = stream
logger.info("Setting up frame processor !!")
self.count = 0
self.skip_frame_every = 1 # 1 does not skip any frame (n-1 frames get skipped)
def run_model(self):
while self.stream.stream.isOpened():
try:
logger.debug("Getting frame mask_model")
frame = self.stream.read()
logger.debug("Running mask_model")
self.count += 1
# print("count.......",self.count)
if frame is not None and self.count % self.skip_frame_every == 0:
fid = uuid4()
data = {
"frame": frame,
"frameId": "{}".format(fid),
"deviceId": "{}".format(DEVICE_ID),
}
self.model.predict(data)
time.sleep(0.01)
except Exception as e:
logger.error(e)
logger.error(traceback.format_exc())
class FrameProcessorv2:
def __init__(self, stream, model):
self.model = model
self.stream = stream
self.count = 0
self.skip_frame_every = 3 # 1 does not skip any frame (n-1 frames get skipped)
def run_model(self):
try:
frame = self.stream
# print("steam",type(frame))
self.count += 1
if frame is not None:
fid = uuid4()
data = {
"frame": frame,
"frameId": "{}".format(fid),
"deviceId": "{}".format(DEVICE_ID),
}
self.model.predict(data)
except Exception as e:
logger.error(e)
logger.error(traceback.format_exc())
from edge_engine.streamio.videostream.fps import FPS
from edge_engine.streamio.videostream.nvgstreamer import NVGstreamer
from edge_engine.streamio.videostream.simplevideostream import SimpleVideoStream
from edge_engine.streamio.videostream.threadedvideostream import ThreadedVideoStream
from edge_engine.streamio.videostream.filevideostream import FileVideoStream
\ No newline at end of file
# import the necessary packages
from threading import Thread
import sys
import cv2
import time
import os
import shutil
import imutils
# import the Queue class from Python 3
if sys.version_info >= (3, 0):
from queue import Queue
# otherwise, import the Queue class for Python 2.7
else:
from Queue import Queue
def frame_transform(frame):
frame = imutils.resize(frame, width=416, height=416)
return frame
class FilePathVideoStream:
def __init__(self, path, transform=None, queue_size=256):
# initialize the file video stream along with the boolean
# used to indicate if the thread should be stopped or not
self.path=path
self.stream=cv2.imread(path)
# while True:
# self.stream = cv2.VideoCapture(path)
# if not self.stream.isOpened():
# time.sleep(0.01)
# continue
# break
self.stopped = False
self.transform = transform
# initialize the queue used to store frames read from
# the video file
# self.Q = Queue(maxsize=queue_size)
# intialize thread
self.thread = Thread(target=self.update, args=())
self.thread.daemon = True
def start(self):
# start a thread to read frames from the file video stream
self.thread.start()
return self
def update(self):
# keep looping infinitely
while True:
# if the thread indicator variable is set, stop the
# thread
if self.stopped:
break
# otherwise, ensure the queue has room in it
frame=self.stream
grabbed=True
if grabbed:
self.stopped = True
if self.transform:
frame = self.transform(frame)
return frame
# add the frame to the queue
else:
time.sleep(0.1) # Rest for 10ms, we have a full queue
# self.stream.release()
def read(self):
# return next frame in the queue
return self.Q.get()
# Insufficient to have consumer use while(more()) which does
# not take into account if the producer has reached end of
# file stream.
def running(self):
# custom addition to this func
cond = self.more() or not self.stopped
if not cond:
self.move_file()
return cond
def more(self):
# return True if there are still frames in the queue. If stream is not stopped, try to wait a moment
tries = 0
while self.Q.qsize() == 0 and not self.stopped and tries < 5:
time.sleep(0.1)
tries += 1
return self.Q.qsize() > 0
def stop(self):
# indicate that the thread should be stopped
self.stopped = True
# wait until stream resources are released (producer thread might be still grabbing frame)
self.thread.join()
def move_file(self):
src_path = self.path
dest_path = "archive"
root_path = os.path.dirname(os.path.abspath(__file__))
src = os.path.join(root_path, src_path)
if not os.path.exists(dest_path):
os.mkdir(dest_path)
shutil.move(src, dest_path)
# import the necessary packages
from threading import Thread
import sys
import cv2
import time
# import the Queue class from Python 3
if sys.version_info >= (3, 0):
from queue import Queue
# otherwise, import the Queue class for Python 2.7
else:
from Queue import Queue
class FileVideoStream:
def __init__(self,stream_config, transform=None):
# initialize the file video stream along with the boolean
# used to indicate if the thread should be stopped or not
self.transform = transform
self.stream_config =stream_config
# initialize the queue used to store frames read from
# the video file
self.build_pipeline()
def start(self):
# start a thread to read frames from the file video stream
self.thread.start()
return self
def build_cv_obj(self):
self.stream = cv2.VideoCapture(self.stream_config["uri"])
self.stopped = False
def build_pipeline(self):
self.build_cv_obj()
if "queueSize" not in self.stream_config:
self.stream_config["queueSize"] =128
self.Q = Queue(maxsize=int(self.stream_config["queueSize"]))
# intialize thread
self.thread = Thread(target=self.update, args=())
self.thread.daemon = True
def is_opened(self):
return self.stream.isOpened()
def update(self):
# keep looping infinitely
while True:
# if the thread indicator variable is set, stop the
# thread
if self.stopped:
break
# otherwise, ensure the queue has room in it
if not self.Q.full():
# read the next frame from the file
(grabbed, frame) = self.stream.read()
# if the `grabbed` boolean is `False`, then we have
# reached the end of the video file
if grabbed is False or frame is None:
#self.stopped = True
self.build_cv_obj()
continue
# if there are transforms to be done, might as well
# do them on producer thread before handing back to
# consumer thread. ie. Usually the producer is so far
# ahead of consumer that we have time to spare.
#
# Python is not parallel but the transform operations
# are usually OpenCV native so release the GIL.
#
# Really just trying to avoid spinning up additional
# native threads and overheads of additional
# producer/consumer queues since this one was generally
# idle grabbing frames.
if self.transform:
frame = self.transform(frame)
# add the frame to the queue
self.Q.put(frame)
else:
time.sleep(0.1) # Rest for 10ms, we have a full queue
self.stream.release()
def read(self):
# return next frame in the queue
return self.Q.get()
# Insufficient to have consumer use while(more()) which does
# not take into account if the producer has reached end of
# file stream.
def running(self):
return self.more() or not self.stopped
def more(self):
# return True if there are still frames in the queue. If stream is not stopped, try to wait a moment
tries = 0
while self.Q.qsize() == 0 and not self.stopped and tries < 5:
time.sleep(0.1)
tries += 1
return self.Q.qsize() > 0
def stop(self):
# indicate that the thread should be stopped
self.stopped = True
# wait until stream resources are released (producer thread might be still grabbing frame)
self.thread.join()
\ No newline at end of file
# import the necessary packages
import datetime
class FPS:
def __init__(self):
# store the start time, end time, and total number of frames
# that were examined between the start and end intervals
self._start = None
self._end = None
self._numFrames = 0
def start(self):
# start the timer
self._start = datetime.datetime.now()
return self
def stop(self):
# stop the timer
self._end = datetime.datetime.now()
def update(self):
# increment the total number of frames examined during the
# start and end intervals
self._numFrames += 1
def elapsed(self):
# return the total number of seconds between the start and
# end interval
return (self._end - self._start).total_seconds()
def fps(self):
# compute the (approximate) frames per second
return self._numFrames / self.elapsed()
import cv2
class NVGstreamer:
def __init__(self, buildconfig):
self.width = 480
self.height = 640
self.latency = 0
self.framerate = "10/1"
self.fformat = "BGRx"
self.BUILD_CONFIG = {
"width": self.width,
"height": self.height,
"latency": self.latency,
"framerate": self.framerate,
"format": self.fformat,
"gstreamer": True
}
self.BUILD_CONFIG.update(buildconfig)
def open_cam_rtsp(self):
gst_str = ('rtspsrc location={uri} latency={latency} ! '
'rtph264depay ! h264parse ! omxh264dec ! '
'nvvidconv ! videorate ! '
'video/x-raw, width=(int){width}, height=(int){height}, '
'format=(string){format}, framerate=(fraction){framerate} ! '
'videoconvert ! appsink').format(**self.BUILD_CONFIG)
print(gst_str)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def open_cam_usb(self):
# We want to set width and height here, otherwise we could just do:
# return cv2.VideoCapture(dev)
gst_str = ('v4l2src device=/dev/video{uri} ! '
'video/x-raw, width=(int){width}, height=(int){height}, '
'format=(string){format}, framerate=(fraction){framerate} ! '
'videoconvert ! appsink').format(**self.BUILD_CONFIG)
print(gst_str)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def open_cam_onboard(self):
# On versions of L4T prior to 28.1, add 'flip-method=2' into gst_str
gst_str = ('nvcamerasrc ! '
'video/x-raw(memory:NVMM), '
'width=(int)2592, height=(int)1458, '
'format=(string)I420 ! '
'nvvidconv ! videorate ! '
'video/x-raw, width=(int){width}, height=(int){height}, '
'format=(string){format}, framerate=(fraction){framerate} !'
'videoconvert ! appsink').format(**self.BUILD_CONFIG)
print(gst_str)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def custom_pipeline(self):
gst_str = "{customGstPipelineString}".format(**self.BUILD_CONFIG)
print(gst_str)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def build_pipeline(self):
if self.BUILD_CONFIG["gStreamer"]!=True:
if self.BUILD_CONFIG["sourceType"] == "usbcam":
self.cap = cv2.VideoCapture(int(self.BUILD_CONFIG["uri"]))
else:
self.cap = cv2.VideoCapture(self.BUILD_CONFIG["uri"])
elif self.BUILD_CONFIG["sourceType"] == "rtsp":
self.cap = self.open_cam_rtsp()
elif self.BUILD_CONFIG["sourceType"] == "usbcam":
self.cap = self.open_cam_usb()
elif self.BUILD_CONFIG["sourceType"] == "onboard":
self.cap = self.open_cam_onboard()
elif self.BUILD_CONFIG["sourceType"] == "customPipeline":
self.cap = self.custom_pipeline()
else:
raise ValueError("unimplemented source {}".format(self.BUILD_CONFIG["sourceType"]))
def get_stream(self):
return self.cap
from edge_engine.common.logsetup import logger
from edge_engine.streamio.videostream import NVGstreamer
class SimpleVideoStream:
def __init__(self, stream_config, name="SimpleVideoStream"):
self.stream_config = stream_config
self.build_pipeline()
(self.grabbed, self.frame) = self.stream.read()
self.name = name
def build_pipeline(self):
self.gstreamer = NVGstreamer(self.stream_config)
self.gstreamer.build_pipeline()
self.stream = self.gstreamer.get_stream()
def start(self):
logger.info("Starting video stream ")
if self.stream.isOpened():
self.grabbed, self.frame = self.stream.read()
if self.grabbed is False:
logger.error("Empty Frame !!!! ")
logger.error("Error opening Capture !!!! ")
self.build_pipeline()
return self
else:
logger.error("Error opening Capture !!!! ")
self.build_pipeline()
def is_opened(self):
return self.stream.isOpened()
def read(self):
# return the frame most recently read
if self.stream.isOpened():
self.grabbed, self.frame = self.stream.read()
if self.grabbed is False:
logger.error("Empty Frame !!!! ")
raise ValueError("Empty Frame !!!! ")
return self.frame
else:
logger.error("Error opening Capture !!!! ")
raise ValueError("Error opening Capture !!!! ")
def stop(self):
if self.stream.isOpened():
self.stream.release()
# import the necessary packages
from threading import Thread
import time
from edge_engine.streamio.videostream import NVGstreamer
from edge_engine.common.logsetup import logger
class ThreadedVideoStream:
def __init__(self, stream_config, name="ThreadedVideoStream"):
# initialize the video camera stream and read the first frame
# from the stream
self.stream_config = stream_config
self.build_pipeline()
# self.stream = stream
(self.grabbed, self.frame) = self.stream.read()
# initialize the thread name
self.name = name
# initialize the variable used to indicate if the thread should
# be stopped
self.stopped = False
def build_pipeline(self):
self.gstreamer = NVGstreamer(self.stream_config)
self.gstreamer.build_pipeline()
self.stream = self.gstreamer.get_stream()
def start(self):
# start the thread to read frames from the video stream
t = Thread(target=self.update, name=self.name, args=())
t.daemon = True
t.start()
return self
def update(self):
# keep looping infinitely until the thread is stopped
while True:
# if the thread indicator variable is set, stop the thread
if self.stopped:
return
# otherwise, read the next frame from the stream
(self.grabbed, self.frame) = self.stream.read()
if self.grabbed is False or self.frame is None:
logger.error("Empty Frame !!!! ")
logger.error("Error opening Capture !!!! ")
self.build_pipeline()
def read(self):
# return the frame most recently read
return self.frame
def stop(self):
# indicate that the thread should be stopped
self.stopped = True
time.sleep(0.2)
self.stream.release()
# Copyright 2019 KnowledgeLens pvt Ltd.
VERSION = '0.0.1.alpha'
opencv-python==4.5.5.62
#pycuda==2020.1
numpy==1.19.4
requests>=2.23.0
expiringdict==1.2.1
minio==7.1.3
cachetools==4.2.4
pymongo==4.0.1
Cython==0.29.21
paho-mqtt==1.5.0
scikit-learn==0.22.2.post1
python-dateutil==2.8.2
imutils==0.5.4
\ No newline at end of file
from.cement_counter import CementBagCounter
\ No newline at end of file
import cv2
import base64
import numpy as np
from scipy.spatial import distance
from expiringdict import ExpiringDict
import time
from edge_engine.common.logsetup import logger
from scripts.utils.infocenter import MongoLogger
from yolov5processor.infer import ExecuteInference
from scripts.utils.edge_utils import get_extra_fields
from edge_engine.ai.model.modelwraper import ModelWrapper
from scripts.utils.centroidtracker import CentroidTracker
from scripts.common.constants import JanusDeploymentConstants
from scripts.utils.image_utils import draw_circles_on_frame, resize_to_64_64
from pymongo import MongoClient
from scripts.common.config import MONGO_URI
from uuid import uuid4
import cv2
import base64
import datetime
import numpy as np
import imutils
from collections import deque
from expiringdict import ExpiringDict
from scipy.optimize import linear_sum_assignment as linear_assignment
from edge_engine.common.logsetup import logger
from edge_engine.ai.model.modelwraper import ModelWrapper
from scripts.utils.tracker import Tracker
from scripts.utils.helpers import box_iou2
from scripts.utils.edge_utils import Utilities
from scripts.utils.infocenter import MongoLogger
from scripts.utils.model_tracker import ModelCountTracker
from scripts.common.constants import JanusDeploymentConstants
from yolov5processor.infer import ExecuteInference
from scripts.utils.relay_util import RelayHandler
from paddleocr import PaddleOCR
class CementBagCounter(ModelWrapper):
def __init__(self, config, model_config, pubs, device_id):
super().__init__()
"""
init function
"""
self.config = config["config"]
self.device_id = device_id
self.rtp = pubs.rtp_write
self.mongo_logger = MongoLogger()
self.frame_skip = self.config.get("frame_skip", False)
# model = "data/acc_v13.pt"
# self.yp = ExecuteInference(
# weight=model,
# gpu=model_config.get("gpu", False),
# agnostic_nms=model_config.get("agnostic_nms", True),
# iou=model_config.get("iou", 0.2),
# confidence=model_config.get("confidence", 0.3),
# img_size=640,
# )
# self.print_eu_dist = model_config.get("print_eu_dist", 200)
# self.ct1 = CentroidTracker(maxDisappeared=5)
# self.ct2 = CentroidTracker(maxDisappeared=5)
self.count = 0
self.cement_bag = 0
self.count_suraksha = 0
self.count_whitecem = 0
self.count_gold = 0
self.tracker_list = []
self.max_age = 3
self.min_hits = 0
self.track_id_list = deque([str(i) for i in range(1, 50)])
self.prev_annotation = []
self.mrp_counter = 0
self.count_nfr = 0
self.count_suraksha_power = 0
self.count_concrete_plus = 0
self.count_ambuja_plus = 0
self.text = " "
self.mrp_text = " "
self.recognition_output_list = []
self.recognition_output=""
# self.prev_class_name = None
self.plant_name = ["R"]
self.year_front_axle_bar_and_engine = ["B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","1","2","3","4"]
self.fixed = ["M","B","N"]
self.internal_engine = ["2", "K", "G", "A"]
self.internal =["T","F","A","E","X","A"]
self.internal_front_axle_bar_2 = ["A","B","A","C","A","A"]
self.internal_front_axle_bar = ["T","F","A","E","X","A"]
# self.fixed_and_internal_front_axle_bar_2 = ["M", "B", "N", "A", "B", "A", "C", "A", "A"]
self.fixed_and_internal_engine = ["2", "K", "C", "A"]
self.month_engine = ["K","L","M","A","B","C","D","E","F","G","H","J"]
self.month_front_axle_bar = ["A","B","C","D","E","F","G","H","I","J","K","L","M"]
self.text_json = {"data":{"text":" ", "fixed characters":" ", "internal characters": " ", "year": " ", "month": " ", "plant": " ", "serial number": " "}, "status": False, "message": " "}
self.initial_object_position = None
self.uncounted_objects = ExpiringDict(
max_len=model_config.get("uncounted_obj_length", 50),
max_age_seconds=model_config.get("uncounted_obj_age", 60),
)
self.janus_metadata = ExpiringDict(max_age_seconds=120, max_len=1)
self.mongo_alarm_coll = MongoClient(MONGO_URI)["ilens_events"][
"triggered_alarms"
]
self.camera_details = self.mongo_logger.get_camera_details(self.device_id)
self.ocr=self.paddle_ocr_load_model()
# self.black_to_white_dict = get_extra_fields(self.device_id).get(
# JanusDeploymentConstants.BLACK_WHITE_RATIO
# )
def _pre_process(self, x):
"""
Do preprocessing here, if any
:param x: payload
:return: payload
"""
return x
def _post_process(self, x):
"""
Apply post processing here, if any
:param x: payload
:return: payload
"""
self.rtp.publish(x) # video stream
return x
def send_payload(
self,
frame,
bag_type="",
label="CementBagDetected",
bg_color="#474520",
font_color="#FFFF00",
alert_sound=None,
message="Cement Bag Detected!",
mrp="FAIL",
mrp_frmae="",
mrp_roi="",
mrp_check_result="",
):
"""
Insert event to Mongo
:param message:
:param frame:
:param label:
:param bg_color:
:param font_color:
:param alert_sound:
:return: None
"""
payload = {
"deviceId": self.device_id,
"message": message,
"frame": "data:image/jpeg;base64,"
+ base64.b64encode(cv2.imencode(".jpg", frame)[1].tostring()).decode(
"utf-8"
),
"activity": label,
"bg_color": bg_color,
"font_color": font_color,
"alert_sound": alert_sound,
"bag_type": bag_type,
"mrp_frmae": mrp_frmae,
"mrp_roi": mrp_roi,
"mrp_digit_check": mrp_check_result,
"mrp": mrp
}
self.mongo_logger.insert_attendance_event_to_mongo(payload)
def paddle_ocr_load_model(self):
ocr = PaddleOCR(
lang="en",
# det_db_thresh=0.1,
# det_db_box_thresh=0.1,
# use_mp=True,
# total_process_num=process_count,
use_angle_cls=True,
cls_model_dir="paddleocr/model/ch_ppocr_mobile_v2.0_cls_infer",
rec_model_dir="paddleocr/model/ch_PP-OCRv3_rec_infer",
det_model_dir="paddleocr/model/en_PP-OCRv3_det_infer")
return ocr
def paddle_ocr_predict(self,ocr,img_path):
result = ocr.ocr(img_path, cls=False, det=True, rec=True)[0]
txts = [line[1][0] for line in result]
return txts[0]
def check_character(self,character):
if len(character) == 17:
return "front_axle_bar"
elif len(character) == 13:
return "engine"
else:
return False
def fixed_char_check(self, text_list, part_name):
if(part_name == "front_axle_bar"):
sub_list = text_list[0:3]
# print(sub_list)
matching_chars = [i for i, j in zip(sub_list, self.fixed) if i == j]
# matching_chars_2 = [i for i, j in zip(sub_list, self.fixed_and_internal_front_axle_bar_2) if i == j]
# print(matching_chars)
if(len(matching_chars) > 1):
return True
return False
else:
return True
def internal_char_check(self, text_list, part_name):
if(part_name == "front_axle_bar"):
sub_list = text_list[3:9]
# print(sub_list)
matching_chars = [i for i, j in zip(sub_list, self.internal_front_axle_bar) if i == j]
matching_chars_2 = [i for i, j in zip(sub_list, self.internal_front_axle_bar_2) if i == j]
# print(matching_chars)
if(len(matching_chars) > 3):
return True, "front axle_bar"
if (len(matching_chars_2) > 3):
return True, "front axle bar 2"
return False, None
else:
sub_list = text_list[3:7]
# print(sub_list)
matching_chars = [i for i, j in zip(sub_list, self.internal_engine) if i == j]
# print(matching_chars)
if (len(matching_chars) < 3):
return False, None
return True, None
def plant_ver(self, text_list, part_name):
if (part_name == "front_axle_bar"):
plant_char = text_list[10]
if (plant_char != "R"):
return False
return True
else:
plant_char = text_list[0]
if (plant_char != "R"):
return False
return True
def year_ver(self, text_list, part_name):
if (part_name == "front_axle_bar"):
year_char = text_list[9]
if (year_char not in self.year_front_axle_bar_and_engine):
return False
return True
else:
year_char = text_list[1]
if (year_char not in self.year_front_axle_bar_and_engine):
return False
return True
def month_ver(self, text_list, part_name):
if (part_name == "front_axle_bar"):
month_char = text_list[11]
if (month_char not in self.month_front_axle_bar):
return False
return True
else:
month_char = text_list[2]
if (month_char not in self.month_engine):
return False
return True
def digit_correction(self, text_list, part_name):
corrected_serial_number = []
if (part_name == "front_axle_bar"):
serial_number = text_list[12:]
for ind, elem in enumerate(serial_number):
if(elem == "O" or elem == "o"):
corrected_serial_number.append("0")
elif(elem == "?" or elem == "T"):
corrected_serial_number.append("7")
elif(elem == "U"):
corrected_serial_number.append("4")
elif(elem == "I" or elem == "i"):
corrected_serial_number.append("1")
else:
corrected_serial_number.append(elem)
else:
serial_number = text_list[7:]
for ind, elem in enumerate(serial_number):
if(elem == "O" or elem == "o"):
corrected_serial_number.append("0")
elif(elem == "?" or elem == "T"):
corrected_serial_number.append("7")
elif(elem == "U"):
corrected_serial_number.append("4")
elif(elem == "I" or elem == "i"):
corrected_serial_number.append("1")
else:
corrected_serial_number.append(elem)
return corrected_serial_number
def defining_actual_text(self, text_list, part_name, fixed_verification, internal_verification, chassis_part, plant_verification, year_verification, month_verification, corrected_digit):
actual_text = []
if(part_name == "front_axle_bar"):
if(fixed_verification):
actual_text.extend(self.fixed)
else:
return "Image not clear", False
if(internal_verification):
if(chassis_part == "front axle_bar"):
actual_text.extend(self.internal_front_axle_bar)
else:
actual_text.extend(self.internal_front_axle_bar_2)
if(year_verification):
actual_text.append(text_list[9])
else:
return "Image not clear", False
if(month_verification):
actual_text.append(text_list[11])
else:
return "Image not clear", False
if(plant_verification):
actual_text.append("R")
else:
return "Image not clear", False
actual_text.extend(corrected_digit)
return actual_text, True
else:
if (plant_verification):
actual_text.append("R")
else:
return "Image not clear", False
if (year_verification):
actual_text.append(text_list[1])
else:
return "Image not clear", False
if (month_verification):
actual_text.append(text_list[2])
else:
return "Image not clear", False
if (internal_verification):
actual_text.extend(self.internal_engine)
if (fixed_verification):
pass
else:
return "Image not clear", False
actual_text.extend(corrected_digit)
return actual_text, True
def _predict(self, obj):
try:
frame = obj["frame"]
text = self.paddle_ocr_predict(self.ocr,frame)
print("detected text is ---------------", text)
part_name =self.check_character(text)
if not part_name:
text="Image not Clear"
else:
text_list = []
for letter in text:
text_list.append(letter)
# print(text_list)
fixed_verification = self.fixed_char_check(text_list, part_name)
internal_verification, chassis_part = self.internal_char_check(text_list, part_name)
# if char_verification1:
# self.recognition_output_list.extend(self.fixed_and_internal_trackter)
# else:
# text = "Image not Clear"
# if(plant_verification):
plant_verification = self.plant_ver(text_list, part_name)
year_verification = self.year_ver(text_list, part_name)
month_verification = self.month_ver(text_list, part_name)
corrected_digit = self.digit_correction(text_list, part_name)
redefined_text, status = self.defining_actual_text(text_list, part_name, fixed_verification, internal_verification, chassis_part, plant_verification, year_verification, month_verification, corrected_digit)
if(status):
if(part_name == "front_axle_bar"):
year = " "
month = " "
fixed_char = redefined_text[0:3]
internal_char = redefined_text[3:9]
year_char = redefined_text[9]
plant_char = redefined_text[11]
month_char = redefined_text[10]
print("MONTH------------------")
print(month_char)
serial_number_char = redefined_text[12:]
if(year_char == "N"):
year = "2022"
if(month_char == "H"):
month = "Augest"
elif(month_char == "C"):
month = "March"
plant = "M$M"
print("****************************************************************")
internal = ''.join(str(x) for x in internal_char)
fixed = ''.join(str(x) for x in fixed_char)
serial_number = ''.join(str(x) for x in serial_number_char)
self.recognition_output = ''.join(str(x) for x in redefined_text)
self.text_json["data"]["text"] = self.recognition_output
self.text_json["data"]["fixed characters"] = fixed
self.text_json["data"]["internal characters"] = internal
self.text_json["data"]["year"] = year
self.text_json["data"]["month"] = month
self.text_json["data"]["plant"] = plant
self.text_json["data"]["serial number"] = serial_number
self.text_json["status"] = True
else:
year = " "
month = " "
# fixed_char = redefined_text[0:3]
internal_char = redefined_text[3:7]
year_char = redefined_text[1]
plant_char = redefined_text[0]
month_char = redefined_text[2]
serial_number_char = redefined_text[7:]
if (year_char == "N"):
year = "2022"
if (month_char == "F"):
month = "September"
plant = "M$M"
internal = ''.join(str(x) for x in internal_char)
fixed = " "
serial_number = ''.join(str(x) for x in serial_number_char)
self.recognition_output = ''.join(str(x) for x in redefined_text)
self.text_json["data"]["text"] = self.recognition_output
self.text_json["data"]["fixed characters"] = fixed
self.text_json["data"]["internal characters"] = internal
self.text_json["data"]["year"] = year
self.text_json["data"]["month"] = month
self.text_json["data"]["plant"] = plant
self.text_json["data"]["serial number"] = serial_number
self.text_json["status"] = True
else:
self.recognition_output = "Image not clear"
self.text_json["data"]["text"] = self.recognition_output
self.text_json["status"] = False
self.text_json["message"] = "Image is not clear"
print(self.text_json)
obj["frame"] = cv2.resize(
frame, (self.config.get("FRAME_WIDTH"), self.config.get("FRAME_HEIGHT"))
)
except Exception as e:
logger.exception(f"Error: {e}", exc_info=True)
obj["frame"] = cv2.resize(
obj["frame"],
(self.config.get("FRAME_WIDTH"), self.config.get("FRAME_HEIGHT")),
)
return obj
import cv2
import base64
import numpy as np
from scipy.spatial import distance
from expiringdict import ExpiringDict
from edge_engine.common.logsetup import logger
from scripts.utils.infocenter import MongoLogger
from yolov5processor.infer import ExecuteInference
from scripts.utils.edge_utils import get_extra_fields
from edge_engine.ai.model.modelwraper import ModelWrapper
from scripts.utils.centroidtracker import CentroidTracker
from scripts.common.constants import JanusDeploymentConstants
from scripts.utils.image_utils import draw_circles_on_frame, resize_to_64_64
from scripts.utils.edge_utils import Utilities
from collections import deque
from scripts.utils.tracker import Tracker
from scripts.utils.helpers import box_iou2
from sklearn.utils.linear_assignment_ import linear_assignment
class CementBagCounter(ModelWrapper):
def __init__(self, config, model_config, pubs, device_id):
super().__init__()
"""
init function
"""
self.config = config["config"]
self.device_id = device_id
self.rtp = pubs.rtp_write
self.mongo_logger = MongoLogger()
self.frame_skip = self.config.get('frame_skip', False)
model = "data/ACC_v3.pt"
self.yp = ExecuteInference(weight=model,
gpu=model_config.get("gpu", False),
agnostic_nms=model_config.get("agnostic_nms", True),
iou=model_config.get("iou", 0.2),
confidence=model_config.get("confidence", 0.4))
self.print_eu_dist = model_config.get('print_eu_dist', 200)
self.ct1 = CentroidTracker(maxDisappeared=5)
self.ct2 = CentroidTracker(maxDisappeared=5)
self.frame_skipping = {
"skip_current_frame": True,
"detection_value": None
}
self.count = 0
self.cement_bag = 0
self.count_suraksha = 0
self.count_whitecem = 0
self.count_gold = 0
self.mrp_counter = 0
self.initial_object_position = Utilities.get_direction(self.device_id)
self.tracker_list = []
self.max_age = 15
self.min_hits = 10
self.track_id_list = deque([str(i) for i in range(1, 50)])
self.prev_annotation = []
self.initial_object_position = None
self.uncounted_objects = ExpiringDict(max_len=model_config.get("uncounted_obj_length", 50),
max_age_seconds=model_config.get("uncounted_obj_age", 60))
self.janus_metadata = ExpiringDict(max_age_seconds=120, max_len=1)
def _pre_process(self, x):
"""
Do preprocessing here, if any
:param x: payload
:return: payload
"""
return x
def _post_process(self, x):
"""
Apply post processing here, if any
:param x: payload
:return: payload
"""
self.rtp.publish(x) # video stream
return x
def send_payload(self, frame, label='CementBagDetected', bg_color="#474520", font_color="#FFFF00", alert_sound=None,
message="Cement Bag Detected!"):
"""
Insert event to Mongo
:param message:
:param frame:
:param label:
:param bg_color:
:param font_color:
:param alert_sound:
:return: None
"""
payload = {"deviceId": self.device_id, "message": message,
"frame": 'data:image/jpeg;base64,' + base64.b64encode(
cv2.imencode('.jpg', frame)[1].tostring()).decode("utf-8"), "activity": label,
"bg_color": bg_color, "font_color": font_color, "alert_sound": alert_sound}
self.mongo_logger.insert_attendance_event_to_mongo(payload)
def track_bags(self, tracker_obj, dets, im0, filter_name, centroid_color=(255, 0, 0)):
"""
Track the bags using Centroid based tracking
:param dets: prediction output
:param tracker_obj: prediction output
:param filter_name: prediction output
:param im0: raw frame
:param centroid_color: color given to the centroid marking
:return: centroid points, frame
"""
bags = list()
classes = list()
for det in dets:
if (det["class"] in filter_name):
bags.append(np.array(det['points']).astype("int"))
classes.append(det["class"])
objects = tracker_obj.update(bags)
objects.pop("frame", None)
if centroid_color is not False:
for (objectID, centroid) in objects.items():
if centroid['has_print']:
centroid_color = (0, 255, 0)
cv2.putText(im0, str(objectID), (centroid['centroid'][0] - 10, centroid['centroid'][1] - 10),
cv2.FONT_HERSHEY_SIMPLEX,
1, centroid_color, 2, cv2.LINE_AA)
cv2.circle(im0, (centroid['centroid'][0], centroid['centroid'][1]), 8, centroid_color, -1)
return objects, classes, im0
def kalman_tracker(
self,
bboxs,
img,
):
z_box = bboxs
x_box = []
if len(self.tracker_list) > 0:
for trk in self.tracker_list:
x_box.append(trk.box)
matched, unmatched_dets, unmatched_trks = self.assign_detections_to_trackers(x_box, z_box, iou_thrd=0.01)
# Deal with matched detections
if matched.size > 0:
for trk_idx, det_idx in matched:
z = z_box[det_idx]
z = np.expand_dims(z, axis=0).T
tmp_trk = self.tracker_list[trk_idx]
tmp_trk.kalman_filter(z)
xx = tmp_trk.x_state.T[0].tolist()
xx = [xx[0], xx[2], xx[4], xx[6]]
x_box[trk_idx] = xx
tmp_trk.box = xx
tmp_trk.hits += 1
# Deal with unmatched detections
if len(unmatched_dets) > 0:
for idx in unmatched_dets:
z = z_box[idx]
z = np.expand_dims(z, axis=0).T
tmp_trk = Tracker() # Create a new tracker
x = np.array([[z[0], 0, z[1], 0, z[2], 0, z[3], 0]]).T
tmp_trk.x_state = x
tmp_trk.predict_only()
xx = tmp_trk.x_state
xx = xx.T[0].tolist()
xx = [xx[0], xx[2], xx[4], xx[6]]
tmp_trk.box = xx
tmp_trk.id = self.track_id_list.popleft() # assign an ID for the tracker
self.tracker_list.append(tmp_trk)
x_box.append(xx)
# Deal with unmatched tracks
if len(unmatched_trks) > 0:
for trk_idx in unmatched_trks:
tmp_trk = self.tracker_list[trk_idx]
tmp_trk.no_losses += 1
tmp_trk.predict_only()
xx = tmp_trk.x_state
xx = xx.T[0].tolist()
xx = [xx[0], xx[2], xx[4], xx[6]]
tmp_trk.box = xx
x_box[trk_idx] = xx
# The list of tracks to be annotated
good_tracker_list = []
objects = []
boxs = []
for trk in self.tracker_list:
if (trk.hits >= self.min_hits) and (trk.no_losses <= self.max_age):
good_tracker_list.append(trk)
x_cv2 = trk.box
left, top, right, bottom = x_cv2[1], x_cv2[0], x_cv2[3], x_cv2[2]
centroid = [int(left + ((right - left) / 2)), bottom]
objects.append([int(trk.id), centroid])
boxs.append(x_cv2)
deleted_tracks = filter(lambda _x: _x.no_losses > self.max_age, self.tracker_list)
for trk in deleted_tracks:
self.track_id_list.append(trk.id)
self.tracker_list = [x for x in self.tracker_list if x.no_losses <= self.max_age]
print("object is ", str(objects))
return img, objects, boxs
@staticmethod
def assign_detections_to_trackers(
trackers,
detections,
iou_thrd=0.3,
):
"""
From current list of trackers and new detections, output matched detections,
un matched trackers, unmatched detections.
"""
iou_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32)
for t, trk in enumerate(trackers):
for d, det in enumerate(detections):
iou_mat[t, d] = box_iou2(trk, det)
matched_idx = linear_assignment(-iou_mat)
unmatched_trackers, unmatched_detections = [], []
for t, trk in enumerate(trackers):
if t not in matched_idx[:, 0]:
unmatched_trackers.append(t)
for d, det in enumerate(detections):
if d not in matched_idx[:, 1]:
unmatched_detections.append(d)
matches = []
for m in matched_idx:
if iou_mat[m[0], m[1]] < iou_thrd:
unmatched_trackers.append(m[0])
unmatched_detections.append(m[1])
else:
matches.append(m.reshape(1, 2))
if len(matches) == 0:
matches = np.empty((0, 2), dtype=int)
else:
matches = np.concatenate(matches, axis=0)
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
def get_line_coordinates(self):
"""
Get the line coordinates from the deployment JSON
"""
if not self.janus_metadata.get('metadata'):
self.janus_metadata['metadata'] = get_extra_fields(self.device_id)
_coordinates = [self.janus_metadata['metadata'].get(coordinate_key) for coordinate_key in
JanusDeploymentConstants.LINE_COORDINATES]
_alignment = self.janus_metadata['metadata'].get(JanusDeploymentConstants.ALIGNMENT_KEY)
return _alignment, _coordinates
def line_point_position(self, point):
"""
Get the position of point w.r.t. the line
:param point: point to be compared
:return: boolean
"""
_alignment, line_coordinates = self.get_line_coordinates()
assert len(line_coordinates) == 4, "Line coordinates variable is invalid"
assert len(point) == 2, "Point variable is invalid"
_slope = (line_coordinates[3] - line_coordinates[1]) / (line_coordinates[2] - line_coordinates[0])
_point_equation_value = point[1] - line_coordinates[1] - _slope * (point[0] - line_coordinates[0])
if _point_equation_value > 0:
return True
else:
return False
def validate_point_position(self, point):
"""
Validate the position of the point w.r.t. the line
:param point: centroid
:return: bool
"""
_alignment, line_coordinates = self.get_line_coordinates()
assert _alignment in [JanusDeploymentConstants.VERTICAL, JanusDeploymentConstants.HORIZONTAL], \
"Invalid alignment variable"
if _alignment == JanusDeploymentConstants.VERTICAL:
line_y2 = line_coordinates[3]
line_y1 = line_coordinates[1]
if line_y1 < point[1] < line_y2 or line_y2 < point[1] < line_y1:
return True
else:
return False
else:
line_x2 = line_coordinates[2]
line_x1 = line_coordinates[0]
if line_x1 < point[0] < line_x2 or line_x2 < point[0] < line_x1:
return True
else:
return False
def update_bag_count(self, frame, detection_objects, classes):
"""
Maintains the bag counts
:param frame: image
:param detection_objects: detection object having object id and centroids
"""
for class_name, (objectID, centroid) in zip(classes, detection_objects.items()):
if self.validate_point_position(centroid['centroid']):
logger.debug("centroid detected")
if not isinstance(self.initial_object_position, bool):
logger.debug("Initializing the initial object position")
self.initial_object_position = self.line_point_position(point=centroid['centroid'])
Utilities.set_direction(self.device_id, self.initial_object_position)
#self.initial_object_position = True
logger.debug(self.initial_object_position)
_point_position = self.line_point_position(point=centroid['centroid'])
logger.debug("object ID is : ", str(objectID))
logger.debug(self.uncounted_objects)
# Check point in the same side as the initial object
if _point_position == self.initial_object_position:
logger.debug("same side only")
# Check the object is not already counted
if objectID not in self.uncounted_objects:
self.uncounted_objects[objectID] = centroid['centroid']
frame = draw_circles_on_frame(frame, centroid['centroid'], radius=10, color=(0, 0, 255),
thickness=-1)
elif objectID in self.uncounted_objects:
logger.debug("different side")
self.uncounted_objects.pop(objectID, None)
if (class_name == "acc_gold"):
self.count_gold += 1
logger.debug(self.count_gold)
elif (class_name == "acc_suraksha_plus"):
self.count_suraksha += 1
logger.debug(self.count_suraksha)
elif (class_name == "ambuja_whitecem"):
self.count_whitecem += 1
logger.debug(self.count_whitecem)
frame = draw_circles_on_frame(frame, centroid['centroid'], radius=10, color=(0, 255, 0),
thickness=-1)
if centroid['has_print']:
self.send_payload(resize_to_64_64(frame=frame), message='Print Detected!')
logger.info(f"Count: {self.count}, Print Found: True")
else:
self.send_payload(resize_to_64_64(frame=frame), message='Print Missing!')
logger.info(f"Count: {self.count}, Print Found: False")
else:
frame = draw_circles_on_frame(frame, centroid['centroid'], radius=10, color=(0, 255, 0),
thickness=-1)
count_text_gold = "ACC_GOLD: " + str(self.count_gold)
count_text_suraksha = "ACC_SURAKSHA_P_PLUS: " + str(self.count_suraksha)
count_text_whitecem = "PPC_WHITE: " + str(self.count_whitecem)
cv2.putText(frame, count_text_gold, (1300, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3,
cv2.LINE_AA)
cv2.putText(frame, count_text_suraksha, (1300, 400), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3,
cv2.LINE_AA)
cv2.putText(frame, count_text_whitecem, (1300, 600), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3,
cv2.LINE_AA)
return frame
def draw_line_over_image(self, frame, color=(255, 255, 255)):
"""
Draws line over the counting line
:param frame: frame for
:param color:
:return:
"""
_alignment, line_coordinates = self.get_line_coordinates()
assert len(line_coordinates) == 4, "Line coordinates variable is invalid"
# return cv2.line(frame, (line_coordinates[0], line_coordinates[1]), (line_coordinates[2], line_coordinates[3]),
# color, 3)
self.drawline(frame, (line_coordinates[0], line_coordinates[1]), (line_coordinates[2],
line_coordinates[3]), color, thickness=3)
return frame
@staticmethod
def drawline(img, pt1, pt2, color, thickness=1, style='dotted', gap=20):
dist = ((pt1[0] - pt2[0]) ** 2 + (pt1[1] - pt2[1]) ** 2) ** .5
pts = []
for i in np.arange(0, dist, gap):
r = i / dist
x = int((pt1[0] * (1 - r) + pt2[0] * r) + .5)
y = int((pt1[1] * (1 - r) + pt2[1] * r) + .5)
p = (x, y)
pts.append(p)
if style == 'dotted':
for p in pts:
cv2.circle(img, p, thickness, color, -1)
else:
s = pts[0]
e = pts[0]
i = 0
for p in pts:
s = e
e = p
if i % 2 == 1:
cv2.line(img, s, e, color, thickness)
i += 1
def distances(self, objs1, objs2):
for key1, val1 in objs1.items():
for key2, val2 in objs2.items():
dst = distance.euclidean(val1['centroid'], val2['centroid'])
if objs1[key1]['has_print']:
self.mrp_counter += 1
if(self.mrp_counter >= 5):
#STOP THE RELAY
pass
continue
elif dst < self.print_eu_dist:
objs1[key1]['has_print'] = True
self.mrp_counter = 0
# def inference(
# self,
# frame,
# classes,
#
# ):
# dets = self.yp.predict(frame)
# class_name = list()
# bboxs = []
#
# if dets:
# for i in dets:
# if(i["class"] in classes):
# class_name.append(i["class"])
# bboxs.append([i["points"][1], i["points"][0], i["points"][3], i["points"][2]])
#
# print("#######")
# print(bboxs)
# #frame = cv2.rectangle(frame, (bboxs[0][0], bboxs[0][1]), (bboxs[0][2], bboxs[0][3]),(255, 255, 0) , 2)
# return bboxs, frame, dets, class_name
def inference(
self,
frame,
):
dets = self.yp.predict(frame)
bboxs = []
if dets:
for i in dets:
bboxs.append([i["points"][1], i["points"][0], i["points"][3], i["points"][2]])
return bboxs, frame, dets
def _predict(self, obj):
class_list = ["acc_gold", "acc_suraksha_plus", "ambuja_buildcem"]
try:
frame = obj['frame']
if self.frame_skip:
if not self.frame_skipping["skip_current_frame"]:
dets = self.yp.predict(frame)
self.frame_skipping["detection_value"] = dets
self.frame_skipping["skip_current_frame"] = True
else:
dets = self.frame_skipping["detection_value"]
self.frame_skipping["skip_current_frame"] = False
else:
dets, frame, _dets = self.inference(frame)
print("PRINTING INFERENCE FUNCTION OUTPUT")
print(dets)
print(_dets)
#print(class_name)
#if dets:
frame, objects, boxs = self.kalman_tracker(dets, frame)
print("PRINTING KALMAN OUTPUTS")
print(objects)
print(boxs)
dets = self.yp.predict(frame)
frame = self.draw_line_over_image(frame)
# if [True for e in dets if e['class'] == 'cement_bag']:
#class_list = ["acc_gold", "acc_suraksha_plus", "ambuja_whitecem"]
mrp = ["mrp"]
objects,classes_cement, frame = self.track_bags(self.ct1, dets, frame, class_list)
_,classes, frame = self.track_bags(self.ct2, dets, frame, mrp)
frame = self.update_bag_count(frame=frame, detection_objects=objects, classes = classes_cement)
cv2.imshow("output is ", cv2.resize(frame, (900, 600)))
cv2.waitKey(1)
self.distances(objects, _)
logger.debug("self.uncounted_objects --> {}".format(self.uncounted_objects))
# for each in dets:
# color = (255, 255, 0)
# class_n = "Cement Bag"
#
# if each['class'] == 'label':
# color = (0, 255, 0)
# class_n = "Printing Detected!"
# cv2.rectangle(frame, (each['points'][0], each['points'][1]), (each['points'][2], each['points'][3]),
# color, 2)
# cv2.putText(frame, class_n, (each['points'][2], each['points'][1]), cv2.FONT_HERSHEY_SIMPLEX,
# 1, color, 2, cv2.LINE_AA)
obj['frame'] = cv2.resize(frame, (self.config.get('FRAME_WIDTH'), self.config.
get('FRAME_HEIGHT')))
except Exception as e:
logger.exception(f"Error: {e}", exc_info=True)
obj['frame'] = cv2.resize(obj['frame'], (self.config.get('FRAME_WIDTH'), self.config.get('FRAME_HEIGHT')))
return obj
import cv2
import base64
import numpy as np
from scipy.spatial import distance
from expiringdict import ExpiringDict
from edge_engine.common.logsetup import logger
from scripts.utils.infocenter import MongoLogger
from yolov5processor.infer import ExecuteInference
from scripts.utils.edge_utils import get_extra_fields
from edge_engine.ai.model.modelwraper import ModelWrapper
from scripts.utils.centroidtracker import CentroidTracker
from scripts.common.constants import JanusDeploymentConstants
from scripts.utils.image_utils import draw_circles_on_frame, resize_to_64_64
import cv2
import base64
import datetime
import numpy as np
from collections import deque
from expiringdict import ExpiringDict
from sklearn.utils.linear_assignment_ import linear_assignment
from edge_engine.common.logsetup import logger
from edge_engine.ai.model.modelwraper import ModelWrapper
from scripts.utils.tracker import Tracker
from scripts.utils.helpers import box_iou2
from scripts.utils.edge_utils import Utilities
from scripts.utils.infocenter import MongoLogger
from scripts.utils.model_tracker import ModelCountTracker
from scripts.common.constants import JanusDeploymentConstants
from yolov5processor.infer import ExecuteInference
class CementBagCounter(ModelWrapper):
def __init__(self, config, model_config, pubs, device_id):
super().__init__()
"""
init function
"""
self.config = config["config"]
self.device_id = device_id
self.rtp = pubs.rtp_write
self.mongo_logger = MongoLogger()
self.frame_skip = self.config.get('frame_skip', False)
model = "data/ACC_v3.pt"
self.yp = ExecuteInference(weight=model,
gpu=model_config.get("gpu", False),
agnostic_nms=model_config.get("agnostic_nms", True),
iou=model_config.get("iou", 0.2),
confidence=model_config.get("confidence", 0.4))
self.print_eu_dist = model_config.get('print_eu_dist', 200)
self.ct1 = CentroidTracker(maxDisappeared=5)
self.ct2 = CentroidTracker(maxDisappeared=5)
self.frame_skipping = {
"skip_current_frame": True,
"detection_value": None
}
self.count = 0
self.cement_bag = 0
self.count_suraksha = 0
self.count_whitecem = 0
self.count_gold = 0
self.tracker_list = []
self.max_age = 3
self.min_hits = 0
self.track_id_list = deque([str(i) for i in range(1, 50)])
self.prev_annotation = []
self.initial_object_position = None
self.uncounted_objects = ExpiringDict(max_len=model_config.get("uncounted_obj_length", 50),
max_age_seconds=model_config.get("uncounted_obj_age", 60))
self.janus_metadata = ExpiringDict(max_age_seconds=120, max_len=1)
def _pre_process(self, x):
"""
Do preprocessing here, if any
:param x: payload
:return: payload
"""
return x
def _post_process(self, x):
"""
Apply post processing here, if any
:param x: payload
:return: payload
"""
self.rtp.publish(x) # video stream
return x
def send_payload(self, frame, label='CementBagDetected', bg_color="#474520", font_color="#FFFF00", alert_sound=None,
message="Cement Bag Detected!"):
"""
Insert event to Mongo
:param message:
:param frame:
:param label:
:param bg_color:
:param font_color:
:param alert_sound:
:return: None
"""
payload = {"deviceId": self.device_id, "message": message,
"frame": 'data:image/jpeg;base64,' + base64.b64encode(
cv2.imencode('.jpg', frame)[1].tostring()).decode("utf-8"), "activity": label,
"bg_color": bg_color, "font_color": font_color, "alert_sound": alert_sound}
self.mongo_logger.insert_attendance_event_to_mongo(payload)
def kalman_tracker(
self,
bboxs,
img,
):
z_box = bboxs
x_box = []
if len(self.tracker_list) > 0:
for trk in self.tracker_list:
x_box.append(trk.box)
matched, unmatched_dets, unmatched_trks = self.assign_detections_to_trackers(x_box, z_box, iou_thrd=0.03)
# Deal with matched detections
if matched.size > 0:
for trk_idx, det_idx in matched:
z = z_box[det_idx]
z = np.expand_dims(z, axis=0).T
tmp_trk = self.tracker_list[trk_idx]
tmp_trk.kalman_filter(z)
xx = tmp_trk.x_state.T[0].tolist()
xx = [xx[0], xx[2], xx[4], xx[6]]
x_box[trk_idx] = xx
tmp_trk.box = xx
tmp_trk.hits += 1
# Deal with unmatched detections
if len(unmatched_dets) > 0:
for idx in unmatched_dets:
z = z_box[idx]
z = np.expand_dims(z, axis=0).T
tmp_trk = Tracker() # Create a new tracker
x = np.array([[z[0], 0, z[1], 0, z[2], 0, z[3], 0]]).T
tmp_trk.x_state = x
tmp_trk.predict_only()
xx = tmp_trk.x_state
xx = xx.T[0].tolist()
xx = [xx[0], xx[2], xx[4], xx[6]]
tmp_trk.box = xx
tmp_trk.id = self.track_id_list.popleft() # assign an ID for the tracker
self.tracker_list.append(tmp_trk)
x_box.append(xx)
# Deal with unmatched tracks
if len(unmatched_trks) > 0:
for trk_idx in unmatched_trks:
tmp_trk = self.tracker_list[trk_idx]
tmp_trk.no_losses += 1
tmp_trk.predict_only()
xx = tmp_trk.x_state
xx = xx.T[0].tolist()
xx = [xx[0], xx[2], xx[4], xx[6]]
tmp_trk.box = xx
x_box[trk_idx] = xx
# The list of tracks to be annotated
good_tracker_list = []
objects = []
boxs = []
for trk in self.tracker_list:
if (trk.hits >= self.min_hits) and (trk.no_losses <= self.max_age):
good_tracker_list.append(trk)
x_cv2 = trk.box
left, top, right, bottom = x_cv2[1], x_cv2[0], x_cv2[3], x_cv2[2]
centroid = [int(left + ((right - left) / 2)), bottom]
objects.append([int(trk.id), centroid])
boxs.append(x_cv2)
deleted_tracks = filter(lambda _x: _x.no_losses > self.max_age, self.tracker_list)
for trk in deleted_tracks:
self.track_id_list.append(trk.id)
self.tracker_list = [x for x in self.tracker_list if x.no_losses <= self.max_age]
# print("object is ", str(objects))
return img, objects, boxs
@staticmethod
def assign_detections_to_trackers(
trackers,
detections,
iou_thrd=0.3,
):
"""
From current list of trackers and new detections, output matched detections,
un matched trackers, unmatched detections.
"""
iou_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32)
for t, trk in enumerate(trackers):
for d, det in enumerate(detections):
iou_mat[t, d] = box_iou2(trk, det)
matched_idx = linear_assignment(-iou_mat)
unmatched_trackers, unmatched_detections = [], []
for t, trk in enumerate(trackers):
if t not in matched_idx[:, 0]:
unmatched_trackers.append(t)
for d, det in enumerate(detections):
if d not in matched_idx[:, 1]:
unmatched_detections.append(d)
matches = []
for m in matched_idx:
if iou_mat[m[0], m[1]] < iou_thrd:
unmatched_trackers.append(m[0])
unmatched_detections.append(m[1])
else:
matches.append(m.reshape(1, 2))
if len(matches) == 0:
matches = np.empty((0, 2), dtype=int)
else:
matches = np.concatenate(matches, axis=0)
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
def get_line_coordinates(self):
"""
Get the line coordinates from the deployment JSON
"""
if not self.janus_metadata.get('metadata'):
self.janus_metadata['metadata'] = get_extra_fields(self.device_id)
_coordinates = [self.janus_metadata['metadata'].get(coordinate_key) for coordinate_key in
JanusDeploymentConstants.LINE_COORDINATES]
_alignment = self.janus_metadata['metadata'].get(JanusDeploymentConstants.ALIGNMENT_KEY)\
# _coordinates = [550, 200, 555, 1100]
#
# _alignment = "vertical"
return _alignment, _coordinates
def line_point_position(self, point):
"""
Get the position of point w.r.t. the line
:param point: point to be compared
:return: boolean
"""
_alignment, line_coordinates = self.get_line_coordinates()
assert len(line_coordinates) == 4, "Line coordinates variable is invalid"
assert len(point) == 2, "Point variable is invalid"
_slope = (line_coordinates[3] - line_coordinates[1]) / (line_coordinates[2] - line_coordinates[0])
_point_equation_value = point[1] - line_coordinates[1] - _slope * (point[0] - line_coordinates[0])
if _point_equation_value > 0:
return True
else:
return False
def validate_point_position(self, point):
"""
Validate the position of the point w.r.t. the line
:param point: centroid
:return: bool
"""
_alignment, line_coordinates = self.get_line_coordinates()
assert _alignment in [JanusDeploymentConstants.VERTICAL, JanusDeploymentConstants.HORIZONTAL], \
"Invalid alignment variable"
if _alignment == JanusDeploymentConstants.VERTICAL:
# _alignment, line_coordinates = self.get_line_coordinates()
# assert _alignment in ["horizontal", "vertical"], \
# "Invalid alignment variable"
# print(point)
# if _alignment == "vertical":
line_y2 = line_coordinates[3]
line_y1 = line_coordinates[1]
if line_y1 < point[1] < line_y2 or line_y2 < point[1] < line_y1:
return True
else:
return False
else:
line_x2 = line_coordinates[2]
line_x1 = line_coordinates[0]
if line_x1 < point[0] < line_x2 or line_x2 < point[0] < line_x1:
return True
else:
return False
def update_bag_count(self, frame, detection_objects, class_name, detections):
"""
Maintains the bag counts
:param frame: image
:param detection_objects: detection object having object id and centroids
"""
#for class_name, (objectID, centroid) in zip(classes, detection_objects):
for (object_id, det) in zip(detection_objects, detections):
centroid = object_id[1]
object_id = object_id[0]
logger.debug(detections)
#print(object_id)
frame = draw_circles_on_frame(frame, centroid, radius=10, color=(0, 0, 255),
thickness=-1)
if self.validate_point_position(centroid):
logger.debug("centroid detected")
#if self.validate_point_position(centroid):
# # if not isinstance(self.count, int):
# # logger.debug("Initializing the count variable")
# print("again entering")
# # Initializing the bag count
# if (class_name == "acc_gold"):
# if not isinstance(self.count_gold, int):
# logger.debug("Initializing the count variable")
# self.count_gold = 0
# elif (class_name == "acc_suraksha"):
# if not isinstance(self.count_suraksha, int):
# logger.debug("Initializing the count variable")
# self.count_suraksha = 0
# elif (class_name == "acc_buildcem"):
# if not isinstance(self.count_whitecem, int):
# logger.debug("Initializing the count variable")
# self.count_whitecem = 0
if not isinstance(self.initial_object_position, bool):
logger.debug("Initializing the initial object position")
#self.initial_object_position = self.line_point_position(point=centroid)
self.initial_object_position = True
logger.debug(self.initial_object_position)
_point_position = self.line_point_position(point=centroid)
#print("object ID is : ", str(objectID))
logger.debug(self.uncounted_objects)
# Check point in the same side as the initial object
if _point_position == self.initial_object_position:
logger.debug("same side only")
#print(class_name)
# Check the object is not already counted
if object_id not in self.uncounted_objects:
self.uncounted_objects[object_id] = centroid
elif object_id in self.uncounted_objects:
self.uncounted_objects.pop(object_id, None)
if ("acc_gold" in class_name):
self.count_gold += 1
mrp_result = self.distances(detections)
if mrp_result:
self.send_payload(resize_to_64_64(frame=frame),
message='ACC GOLD Bag Detected: Print Detected!')
logger.info(f"Count: {self.count_gold}, Print Found: True")
else:
self.send_payload(resize_to_64_64(frame=frame),
message='ACC GOLD Bag Detected: Print Missing!')
logger.info(f"Count: {self.count_gold}, Print Found: False")
elif ("acc_suraksha_plus" in class_name):
self.count_suraksha += 1
logger.debug(self.count_suraksha)
mrp_result = self.distances(detections)
if mrp_result:
self.send_payload(resize_to_64_64(frame=frame),
message='ACC SURAKSHA PLUS Bag Detected: Print Detected!')
logger.info(f"Count: {self.count_suraksha}, Print Found: True")
else:
self.send_payload(resize_to_64_64(frame=frame),
message='ACC SURAKSHA PLUS Bag Detected: Print Missing!')
logger.info(f"Count: {self.count_suraksha}, Print Found: False")
elif ("ambuja_whitecem" in class_name):
self.count_whitecem += 1
mrp_result = self.distances(detections)
if mrp_result:
self.send_payload(resize_to_64_64(frame=frame),
message='PPC White Bag Detected: Print Detected!')
logger.info(f"Count: {self.count_whitecem}, Print Found: True")
else:
self.send_payload(resize_to_64_64(frame=frame),
message='PPC White Bag Detected: Print Missing!')
logger.info(f"Count: {self.count_whitecem}, Print Found: False")
frame = draw_circles_on_frame(frame, centroid['centroid'], radius=10, color=(0, 255, 0),
thickness=-1)
if centroid['has_print']:
self.send_payload(resize_to_64_64(frame=frame), message='Print Detected!')
logger.info(f"Count: {self.count}, Print Found: True")
else:
self.send_payload(resize_to_64_64(frame=frame), message='Print Missing!')
logger.info(f"Count: {self.count}, Print Found: False")
else:
frame = draw_circles_on_frame(frame, centroid['centroid'], radius=10, color=(0, 255, 0),
thickness=-1)
# count_text_gold = "ACC_GOLD: " + str(self.count_gold)
# count_text_suraksha = "ACC_SURAKSHA_PLUS: " + str(self.count_suraksha)
# count_text_whitecem = "ACC_WHITE_CEM: " + str(self.count_whitecem)
# cv2.putText(frame, count_text_gold, (1300, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3,
# cv2.LINE_AA)
# cv2.putText(frame, count_text_suraksha, (1300, 400), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3,
# cv2.LINE_AA)
# cv2.putText(frame, count_text_whitecem, (1300, 600), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3,
# cv2.LINE_AA)
return frame
def draw_line_over_image(self, frame, color=(255, 255, 255)):
"""
Draws line over the counting line
:param frame: frame for
:param color:
:return:
"""
_alignment, line_coordinates = self.get_line_coordinates()
assert len(line_coordinates) == 4, "Line coordinates variable is invalid"
# return cv2.line(frame, (line_coordinates[0], line_coordinates[1]), (line_coordinates[2], line_coordinates[3]),
# color, 3)
self.drawline(frame, (line_coordinates[0], line_coordinates[1]), (line_coordinates[2],
line_coordinates[3]), color, thickness=3)
return frame
@staticmethod
def drawline(img, pt1, pt2, color, thickness=1, style='dotted', gap=20):
dist = ((pt1[0] - pt2[0]) ** 2 + (pt1[1] - pt2[1]) ** 2) ** .5
pts = []
for i in np.arange(0, dist, gap):
r = i / dist
x = int((pt1[0] * (1 - r) + pt2[0] * r) + .5)
y = int((pt1[1] * (1 - r) + pt2[1] * r) + .5)
p = (x, y)
pts.append(p)
if style == 'dotted':
for p in pts:
cv2.circle(img, p, thickness, color, -1)
else:
s = pts[0]
e = pts[0]
i = 0
for p in pts:
s = e
e = p
if i % 2 == 1:
cv2.line(img, s, e, color, thickness)
i += 1
def distances(self, detections):
mrp_cord = list()
cem_bag_cord = list()
for det in detections:
if(det["class"] == "mrp"):
mrp_cord.append(det["points"])
else:
cem_bag_cord.append(det["points"])
for c_cord in cem_bag_cord:
for m_cord in mrp_cord:
if (m_cord[0] > c_cord[0] and m_cord[0] < c_cord[2] and
m_cord[1] > c_cord[1] and m_cord[1] < c_cord[3]):
logger.debug("print is detected")
#cv2.waitKey(0)
return True
else:
return False
def inference(
self,
frame,
classes,
):
dets = self.yp.predict(frame)
class_name = list()
bboxs = []
if dets:
for i in dets:
if(i["class"] in classes):
class_name.append(i["class"])
#cv2.rectangle(frame, (i["points"][0], i["points"][1]), (i["points"][2], i["points"][3]), (255, 255, 0), 2)
bboxs.append([i["points"][1], i["points"][0], i["points"][3], i["points"][2]])
# frame = cv2.rectangle(frame, (bboxs[0][0], bboxs[0][1]), (bboxs[0][2], bboxs[0][3]),(255, 255, 0) , 2)
return bboxs, frame, dets, class_name
def _predict(self, obj):
class_list = ["acc_gold", "acc_suraksha_plus", "ambuja_whitecem"]
mrp = ["mrp"]
try:
frame = obj['frame']
if self.frame_skip:
if not self.frame_skipping["skip_current_frame"]:
dets = self.yp.predict(frame)
self.frame_skipping["detection_value"] = dets
self.frame_skipping["skip_current_frame"] = True
else:
dets = self.frame_skipping["detection_value"]
self.frame_skipping["skip_current_frame"] = False
else:
dets, frame, _dets, class_name = self.inference(frame, class_list)
#dets_mrp, frame_mrp, _dets_mrp, class_name_mrp = self.inference(frame, mrp)
frame = self.draw_line_over_image(frame)
# if [True for e in dets if e['class'] == 'cement_bag']:
#if dets:
frame, objects, boxs = self.kalman_tracker(dets, frame)
print("PRINTING KALMAN OUTPUT")
print(objects)
print(boxs)
for box in boxs:
cv2.rectangle(frame, (box[1], box[0]), (box[3], box[2]), (255, 0, 0), 2)
#objects,classes_cement, frame = self.track_bags(self.ct1, dets, frame, class_list)
#_,classes, frame = self.track_bags(self.ct2, _dets, frame, mrp)
#frame, _, box_mrp = self.kalman_tracker(dets_mrp, frame)
frame = self.update_bag_count(frame=frame, detection_objects=objects, class_name = class_name, detections = _dets)
# cv2.imshow("output is ", cv2.resize(frame, (900, 600)))
# cv2.waitKey(1)
# print("******")
# print(objects)
# print(_)
# self.distances(objects, _)
logger.debug("self.uncounted_objects --> {}".format(self.uncounted_objects))
# for each in dets:
# color = (255, 255, 0)
# class_n = "Cement Bag"
#
# if each['class'] == 'label':
# color = (0, 255, 0)
# class_n = "Printing Detected!"
# cv2.rectangle(frame, (each['points'][0], each['points'][1]), (each['points'][2], each['points'][3]),
# color, 2)
# cv2.putText(frame, class_n, (each['points'][2], each['points'][1]), cv2.FONT_HERSHEY_SIMPLEX,
# 1, color, 2, cv2.LINE_AA)
obj['frame'] = cv2.resize(frame, (self.config.get('FRAME_WIDTH'), self.config.
get('FRAME_HEIGHT')))
except Exception as e:
logger.exception(f"Error: {e}", exc_info=True)
obj['frame'] = cv2.resize(obj['frame'], (self.config.get('FRAME_WIDTH'), self.config.get('FRAME_HEIGHT')))
return obj
import cv2
import json
import base64
import numpy as np
from scipy.spatial import distance
from expiringdict import ExpiringDict
import time
from edge_engine.common.logsetup import logger
from scripts.utils.infocenter import MongoLogger
# from yolov5processor.infer import ExecuteInference
from scripts.utils.edge_utils import get_extra_fields
from edge_engine.ai.model.modelwraper import ModelWrapper
from scripts.utils.centroidtracker import CentroidTracker
from scripts.common.constants import JanusDeploymentConstants
from scripts.utils.image_utils import draw_circles_on_frame, resize_to_64_64
from pymongo import MongoClient
from scripts.common.config import MONGO_URI
from uuid import uuid4
import cv2
import base64
import datetime
import numpy as np
import imutils
from collections import deque
from expiringdict import ExpiringDict
from sklearn.utils.linear_assignment_ import linear_assignment
from edge_engine.common.logsetup import logger
from edge_engine.ai.model.modelwraper import ModelWrapper
from scripts.utils.tracker import Tracker
from scripts.utils.helpers import box_iou2
from scripts.utils.edge_utils import Utilities
from scripts.utils.infocenter import MongoLogger
from scripts.utils.model_tracker import ModelCountTracker
from scripts.common.constants import JanusDeploymentConstants
# TRT Additions start
# from yolov5processor.infer import ExecuteInference
# from scripts.utils.yolov5_trt import YoloV5TRT
# TRT Additions stop
from scripts.utils.relay_util import RelayHandler
from paddleocr import PaddleOCR
class CementBagCounter(ModelWrapper):
def __init__(self, config, model_config, pubs, device_id):
super().__init__()
"""
init function
"""
self.config = config["config"]
self.device_id = device_id
self.rtp = pubs.rtp_write
self.mongo_logger = MongoLogger()
self.frame_skip = self.config.get('frame_skip', False)
# TRT Additions start
# model = "data/acc_v6.pt"
# self.yp = ExecuteInference(weight=model,
# gpu=model_config.get("gpu", False),
# agnostic_nms=model_config.get("agnostic_nms", True),
# iou=model_config.get("iou", 0.2),
# confidence=model_config.get("confidence", 0.4))
engine_file_path = "data/acc_v14.engine"
# with open("/home/ilens/container_weights/classes.json", 'r') as f:
# self.classes = json.loads(f.read())
# self.classes = {int(k): v for k, v in self.classes.items()}
self.classes = {0: 'ambuja_plus', 1: 'acc_gold', 2: 'acc_suraksha_power_plus', 3: 'ambuja_buildcem', 4: 'mrp', 5: 'acc_suraksha_power', 6: 'acc_nfr', 7: 'acc_concrete_plus'}
# self.yolo_v5_wrapper = YoloV5TRT(engine_file_path, model_config.get('conf_thresh', 0.5),
# model_config.get('iou_thresh', 0.4))
# TRT Additions stop
# self.print_eu_dist = model_config.get('print_eu_dist', 200)
self.ct1 = CentroidTracker(maxDisappeared=5)
self.ct2 = CentroidTracker(maxDisappeared=5)
self.count = 0
self.cement_bag = 0
self.count_suraksha = 0
self.count_whitecem = 0
self.count_gold = 0
self.tracker_list = []
self.max_age = 3
self.min_hits = 0
self.track_id_list = deque([str(i) for i in range(1, 50)])
self.prev_annotation = []
self.mrp_counter = 0
self.count_nfr = 0
self.count_suraksha_power = 0
self.count_concrete_plus = 0
self.count_ambuja_plus = 0
# self.prev_class_name = None
self.initial_object_position = None
self.uncounted_objects = ExpiringDict(max_len=model_config.get("uncounted_obj_length", 50),
max_age_seconds=model_config.get("uncounted_obj_age", 60))
self.janus_metadata = ExpiringDict(max_age_seconds=120, max_len=1)
self.mongo_alarm_coll = MongoClient(MONGO_URI)['ilens_events']['triggered_alarms']
self.camera_details = self.mongo_logger.get_camera_details(self.device_id)
self.black_white_ratio_dict = {'ambuja_plus': 1.2, 'acc_gold': 1.2, 'acc_suraksha_power_plus': 1.2,
'ambuja_buildcem': 1.2, 'acc_suraksha_power': 1.2, 'acc_nfr': 1.2,
'acc_concrete_plus': 1.2}
def paddle_ocr_load_model(self):
ocr = PaddleOCR(
lang="en",
# det_db_thresh=0.1,
# det_db_box_thresh=0.1,
# use_mp=True,
# total_process_num=process_count,
use_angle_cls=True,
cls_model_dir="paddleocr/model/ch_ppocr_mobile_v2.0_cls_infer",
rec_model_dir="paddleocr/model/ch_PP-OCRv3_rec_infer",
det_model_dir="paddleocr/model/en_PP-OCRv3_det_infer")
return ocr
def paddle_ocr_prediction(self,img_path, ocr):
result = ocr.ocr(img_path, cls=False, det=True, rec=True)[0]
txts = [line[1][0] for line in result]
return txts
def check_character(self,character):
if len(character) < 17:
return False
else:
return character
def fixed_character(self,character):
if character[len(fixed) + len(internal)] not in year_list:
return False
if fixed != character[0:len(fixed)]:
return False
if internal != character[len(fixed):len(fixed) + len(internal)]:
return False
if plant_name != character[len(fixed) + len(internal) + 1]:
return False
if character[len(fixed) + len(internal) + 2] not in month_list:
return False
return character
def craft_character_find(self):
# read image
image = read_image(image)
# load models
refine_net = load_refinenet_model(cuda=False)
craft_net = load_craftnet_model(cuda=False)
# perform prediction
prediction_result = get_prediction(
image=image,
craft_net=craft_net,
refine_net=refine_net,
text_threshold=0.7,
link_threshold=9999999999999999999,
low_text=0.4,
cuda=False,
long_size=1280
)
# export detected text regions
exported_file_paths = export_detected_regions(
image=image,
regions=prediction_result["boxes"],
output_dir=output_dir,
rectify=True
)
# export heatmap, detection points, box visualization
export_extra_results(
image=image,
regions=prediction_result["boxes"],
heatmaps=prediction_result["heatmaps"],
output_dir=output_dir
)
# unload models from gpu
empty_cuda_cache()
print("find")
def classify_last_eight_chacter(self):
print("8")
def verify_mont_year_plant(self):
print("month")
def compare_to_get_final_output(self):
print("output")
def final_output(self):
prediction="success"
print(prediction)
def _pre_process(self, x):
"""
Do preprocessing here, if any
:param x: payload
:return: payload
"""
return x
def _post_process(self, x):
"""
Apply post processing here, if any
:param x: payload
:return: payload
"""
self.rtp.publish(x) # video stream
return x
def send_payload(self, frame, bag_type='', label='CementBagDetected', bg_color="#474520", font_color="#FFFF00",
alert_sound=None,
message="Cement Bag Detected!", mrp_frmae='', mrp_roi = '', mrp = ''):
"""
Insert event to Mongo
:param message:
:param frame:
:param label:
:param bg_color:
:param font_color:
:param alert_sound:
:return: None
"""
payload = {"deviceId": self.device_id, "message": message,
"frame": 'data:image/jpeg;base64,' + base64.b64encode(
cv2.imencode('.jpg', frame)[1].tostring()).decode("utf-8"), "activity": label,
"bg_color": bg_color, "font_color": font_color, "alert_sound": alert_sound, "bag_type": bag_type,
"mrp_frmae": mrp_frmae, "mrp_roi" : mrp_roi, "mrp" : mrp}
self.mongo_logger.insert_attendance_event_to_mongo(payload)
def kalman_tracker(
self,
bboxs,
img,
):
z_box = bboxs
x_box = []
if len(self.tracker_list) > 0:
for trk in self.tracker_list:
x_box.append(trk.box)
matched, unmatched_dets, unmatched_trks = self.assign_detections_to_trackers(x_box, z_box, iou_thrd=0.03)
# Deal with matched detections
if matched.size > 0:
for trk_idx, det_idx in matched:
z = z_box[det_idx]
z = np.expand_dims(z, axis=0).T
tmp_trk = self.tracker_list[trk_idx]
tmp_trk.kalman_filter(z)
xx = tmp_trk.x_state.T[0].tolist()
xx = [xx[0], xx[2], xx[4], xx[6]]
x_box[trk_idx] = xx
tmp_trk.box = xx
tmp_trk.hits += 1
# Deal with unmatched detections
if len(unmatched_dets) > 0:
for idx in unmatched_dets:
z = z_box[idx]
z = np.expand_dims(z, axis=0).T
tmp_trk = Tracker() # Create a new tracker
x = np.array([[z[0], 0, z[1], 0, z[2], 0, z[3], 0]]).T
tmp_trk.x_state = x
tmp_trk.predict_only()
xx = tmp_trk.x_state
xx = xx.T[0].tolist()
xx = [xx[0], xx[2], xx[4], xx[6]]
tmp_trk.box = xx
tmp_trk.id = self.track_id_list.popleft() # assign an ID for the tracker
self.tracker_list.append(tmp_trk)
x_box.append(xx)
# Deal with unmatched tracks
if len(unmatched_trks) > 0:
for trk_idx in unmatched_trks:
tmp_trk = self.tracker_list[trk_idx]
tmp_trk.no_losses += 1
tmp_trk.predict_only()
xx = tmp_trk.x_state
xx = xx.T[0].tolist()
xx = [xx[0], xx[2], xx[4], xx[6]]
tmp_trk.box = xx
x_box[trk_idx] = xx
# The list of tracks to be annotated
good_tracker_list = []
objects = []
boxs = []
for trk in self.tracker_list:
if (trk.hits >= self.min_hits) and (trk.no_losses <= self.max_age):
good_tracker_list.append(trk)
x_cv2 = trk.box
left, top, right, bottom = x_cv2[1], x_cv2[0], x_cv2[3], x_cv2[2]
centroid = [int(left + ((right - left) / 2)), bottom]
objects.append([int(trk.id), centroid])
boxs.append(x_cv2)
deleted_tracks = filter(lambda _x: _x.no_losses > self.max_age, self.tracker_list)
for trk in deleted_tracks:
self.track_id_list.append(trk.id)
self.tracker_list = [x for x in self.tracker_list if x.no_losses <= self.max_age]
# print("object is ", str(objects))
return img, objects, boxs
@staticmethod
def assign_detections_to_trackers(
trackers,
detections,
iou_thrd=0.3,
):
"""
From current list of trackers and new detections, output matched detections,
un matched trackers, unmatched detections.
"""
iou_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32)
for t, trk in enumerate(trackers):
for d, det in enumerate(detections):
iou_mat[t, d] = box_iou2(trk, det)
matched_idx = linear_assignment(-iou_mat)
unmatched_trackers, unmatched_detections = [], []
for t, trk in enumerate(trackers):
if t not in matched_idx[:, 0]:
unmatched_trackers.append(t)
for d, det in enumerate(detections):
if d not in matched_idx[:, 1]:
unmatched_detections.append(d)
matches = []
for m in matched_idx:
if iou_mat[m[0], m[1]] < iou_thrd:
unmatched_trackers.append(m[0])
unmatched_detections.append(m[1])
else:
matches.append(m.reshape(1, 2))
if len(matches) == 0:
matches = np.empty((0, 2), dtype=int)
else:
matches = np.concatenate(matches, axis=0)
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
def get_line_coordinates(self):
"""
Get the line coordinates from the deployment JSON
"""
if not self.janus_metadata.get('metadata'):
self.janus_metadata['metadata'] = get_extra_fields(self.device_id)
_coordinates = [self.janus_metadata['metadata'].get(coordinate_key) for coordinate_key in
JanusDeploymentConstants.LINE_COORDINATES]
_alignment = self.janus_metadata['metadata'].get(JanusDeploymentConstants.ALIGNMENT_KEY) \
# _coordinates = [550, 200, 555, 1100]
#
# _alignment = "vertical"
return _alignment, _coordinates
def line_point_position(self, point):
"""
Get the position of point w.r.t. the line
:param point: point to be compared
:return: boolean
"""
_alignment, line_coordinates = self.get_line_coordinates()
assert len(line_coordinates) == 4, "Line coordinates variable is invalid"
assert len(point) == 2, "Point variable is invalid"
_slope = (line_coordinates[3] - line_coordinates[1]) / (line_coordinates[2] - line_coordinates[0])
_point_equation_value = point[1] - line_coordinates[1] - _slope * (point[0] - line_coordinates[0])
if _point_equation_value > 0:
return True
else:
return False
def insert_alarm_event(self, message="MRP Missing", asset_hierarchy=""):
data = {
"device_instance_id": asset_hierarchy,
"device_instance_ids": asset_hierarchy,
"alarm_id": "alarm_configuration_212",
"triggered_devices": [
asset_hierarchy
],
"tag_value": 5.0,
"start_time": None,
"end_time": "",
"current_level": 0,
"id": "alarm_event_1567101",
"trigger_time": [
{
"start_time": None,
"counter": 0
}
],
"trigger_levels": [
{
"timestamp": None,
"notificaton_profile": [
{
"usersOrUserGroup": [
{
"value": "access_group_100",
"type": "access_group",
"label": "ACC Admin"
}
],
"notificationProfile": [
"alarm_notify_type_4"
],
"emailIds": [],
"phoneNumbers": [],
"notificationTone": "",
"isNotificationToneShow": True,
"triggers": [
{
"device_instance_id": None,
"tags": None,
"customValueType": None,
"customValue": None,
"counter": 1
}
],
"counter": 1,
"enable_custom": True
}
]
}
],
"priority": "alarm_priority_type_109",
"tag_id": "",
"template": "MRP Missing",
"acknowledge": True,
"alarmName": message,
"alarmType": "Alarm",
"created_by": "user_100",
"project_id": "project_101",
"product_encrypted": False,
"start_time_in_epoch": None,
"show_data_viz": True,
"alarm_condition": message,
"tag_id_list": [
asset_hierarchy
],
"tag_value_json": {
asset_hierarchy: 5.0
},
"alarm_tag_list": [
asset_hierarchy
],
"acknowledged_at": "2022-01-06 20:51:22",
"acknowledged_by": None
}
epoch = int(time.time()) * 1000
time_string = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
data['start_time_in_epoch'] = epoch
data['trigger_time'][0]['start_time'] = epoch
data['start_time'] = time_string
data['trigger_levels'][0]['timestamp'] = time_string
data['acknowledged_at'] = time_string
data['id'] = f"alarm_event_{str(uuid4()).split('-')[0]}"
self.mongo_alarm_coll.insert_one(data)
def validate_point_position(self, point):
"""
Validate the position of the point w.r.t. the line
:param point: centroid
:return: bool
"""
_alignment, line_coordinates = self.get_line_coordinates()
assert _alignment in [JanusDeploymentConstants.VERTICAL, JanusDeploymentConstants.HORIZONTAL], \
"Invalid alignment variable"
if _alignment == JanusDeploymentConstants.VERTICAL:
# _alignment, line_coordinates = self.get_line_coordinates()
# assert _alignment in ["horizontal", "vertical"], \
# "Invalid alignment variable"
# print(point)
# if _alignment == "vertical":
line_y2 = line_coordinates[3]
line_y1 = line_coordinates[1]
if line_y1 < point[1] < line_y2 or line_y2 < point[1] < line_y1:
return True
else:
return False
else:
line_x2 = line_coordinates[2]
line_x1 = line_coordinates[0]
if line_x1 < point[0] < line_x2 or line_x2 < point[0] < line_x1:
return True
else:
return False
# def verifying_cement_bag_type(self, previous_class, current_class):
# if(previous_class == current_class):
# print("bag changed")
def update_bag_count(self, frame, detection_objects, class_name, detections):
"""
Maintains the bag counts
:param frame: image
:param detection_objects: detection object having object id and centroids
"""
# for class_name, (objectID, centroid) in zip(classes, detection_objects):
for (object_id, det, class_detected) in zip(
detection_objects, detections, class_name
):
centroid = object_id[1]
object_id = object_id[0]
logger.debug(detections)
# print(object_id)
frame = draw_circles_on_frame(
frame, centroid, radius=10, color=(0, 0, 255), thickness=-1
)
if self.validate_point_position(centroid):
logger.debug("centroid detected")
if not isinstance(self.initial_object_position, bool):
logger.debug("Initializing the initial object position")
# self.initial_object_position = self.line_point_position(point=centroid)
self.initial_object_position = True
logger.debug(self.initial_object_position)
_point_position = self.line_point_position(point=centroid)
# print("object ID is : ", str(objectID))
logger.debug(self.uncounted_objects)
# Check point in the same side as the initial object
if _point_position == self.initial_object_position:
logger.debug("same side only")
# print(class_name)
# Check the object is not already counted
if object_id not in self.uncounted_objects:
self.uncounted_objects[object_id] = centroid
elif object_id in self.uncounted_objects:
# print("************")
# print(class_detected)
self.uncounted_objects.pop(object_id, None)
if class_detected == "acc_gold":
gold_flag = self.yellow_thresholding(
detections, frame, class_detected
)
if gold_flag:
self.count_gold += 1
else:
self.count_suraksha += 1
# self.verifying_cement_bag_type(self.prev_class_name, "acc_gold")
# self.prev_class_name = "acc_gold"
class_detected = "acc_suraksha_power_plus"
(
mrp_result,
mrp_frame,
mrp_roi,
) = self.distances(detections, frame, class_detected)
if mrp_result:
if gold_flag:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="acc_gold",
message="ACC GOLD, MRP:YES",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
mrp="PASS",
)
logger.info(
f"Count: {self.count_gold}, Print Found: True"
)
else:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="acc_suraksha_power_plus",
message="ACC SURAKSHA PP, MRP:YES",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
mrp="PASS",
)
logger.info(
f"Count: {self.count_suraksha}, Print Found: True"
)
else:
if gold_flag:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="acc_gold",
message="ACC GOLD, MRP:NO",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
)
logger.info(
f"Count: {self.count_gold}, Print Found: False"
)
else:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="acc_suraksha_power_plus",
message="ACC SURAKSHA PP, MRP:NO",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
)
logger.info(
f"Count: {self.count_suraksha}, Print Found: False"
)
elif class_detected == "acc_suraksha_power_plus":
self.count_suraksha += 1
# self.verifying_cement_bag_type(self.prev_class_name, "acc_suraksha")
# self.prev_class_name = "acc_suraksha"
logger.debug(self.count_suraksha)
(
mrp_result,
mrp_frame,
mrp_roi
) = self.distances(detections, frame, class_detected)
if mrp_result:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="acc_suraksha_power_plus",
message="ACC SURAKSHA PP, MRP:YES",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
mrp="PASS",
)
logger.info(
f"Count: {self.count_suraksha}, Print Found: True"
)
else:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="acc_suraksha_power_plus",
message="ACC SURAKSHA PP, MRP:NO",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
)
logger.info(
f"Count: {self.count_suraksha}, Print Found: False"
)
elif class_detected == "ambuja_buildcem":
self.count_whitecem += 1
# self.verifying_cement_bag_type(self.prev_class_name, "ambuja_buildcem")
# self.prev_class_name = "ambuja_buildcem"
(
mrp_result,
mrp_frame,
mrp_roi
) = self.distances(detections, frame, class_detected)
if mrp_result:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="ambuja_buildcem",
message="AMBUJA BUILDCEM, MRP:YES",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
mrp="PASS",
)
logger.info(
f"Count: {self.count_whitecem}, Print Found: True"
)
else:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="ambuja_buildcem",
message="AMBUJA BUILDCEM, MRP:NO",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
)
logger.info(
f"Count: {self.count_whitecem}, Print Found: False"
)
elif class_detected == "acc_nfr":
self.count_nfr += 1
# self.verifying_cement_bag_type(self.prev_class_name, "acc_nfr")
# self.prev_class_name = "acc_nfr"
logger.debug(self.count_nfr)
(
mrp_result,
mrp_frame,
mrp_roi,
) = self.distances(detections, frame, class_detected)
if mrp_result:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="acc_nfr",
message="ACC NFR, MRP:YES",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
mrp="PASS",
)
logger.info(f"Count: {self.count_nfr}, Print Found: True")
else:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="acc_nfr",
message="ACC NFR, MRP:NO",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
)
logger.info(f"Count: {self.count_nfr}, Print Found: False")
elif class_detected == "acc_suraksha_power":
self.count_suraksha_power += 1
# self.verifying_cement_bag_type(self.prev_class_name, "acc_suraksha_power")
# self.prev_class_name = "acc_suraksha_power"
logger.debug(self.count_suraksha_power)
(
mrp_result,
mrp_frame,
mrp_roi,
) = self.distances(detections, frame, class_detected)
if mrp_result:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="acc_suraksha_power",
message="ACC SURAKSHA POWER, MRP:YES",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
mrp="PASS",
)
logger.info(
f"Count: {self.count_suraksha_power}, Print Found: True"
)
else:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="acc_suraksha_power",
message="ACC SURAKSHA POWER, MRP:NO",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
)
logger.info(
f"Count: {self.count_suraksha_power}, Print Found: False"
)
elif class_detected == "acc_concrete_plus":
self.count_concrete_plus += 1
# self.verifying_cement_bag_type(self.prev_class_name, "acc_nfr")
# self.prev_class_name = "acc_nfr"
logger.debug(self.count_concrete_plus)
(
mrp_result,
mrp_frame,
mrp_roi,
) = self.distances(detections, frame, class_detected)
if mrp_result:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="acc_concrete_plus",
message="ACC CONCRETE PLUS, MRP:YES",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
mrp="PASS",
)
logger.info(
f"Count: {self.count_concrete_plus}, Print Found: True"
)
else:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="acc_concrete_plus",
message="ACC CONCRETE PLUS, MRP:NO",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
)
logger.info(
f"Count: {self.count_concrete_plus}, Print Found: False"
)
elif class_detected == "ambuja_plus":
self.count_ambuja_plus += 1
# self.verifying_cement_bag_type(self.prev_class_name, "acc_nfr")
# self.prev_class_name = "acc_nfr"
logger.debug(self.count_ambuja_plus)
(
mrp_result,
mrp_frame,
mrp_roi,
) = self.distances(detections, frame, class_detected)
self.text = "COUNT : {ambuja_count}/10".format(
ambuja_count=self.count_ambuja_plus
)
if mrp_result:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="ambuja_plus",
message="Ambuja PLUS, MRP:YES",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
mrp="PASS",
)
logger.info(
f"Count: {self.count_ambuja_plus}, Print Found: True"
)
else:
self.send_payload(
resize_to_64_64(frame=frame),
bag_type="ambuja_plus",
message="Ambuja PLUS, MRP:NO",
mrp_frmae=mrp_frame,
mrp_roi=mrp_roi,
)
logger.info(
f"Count: {self.count_ambuja_plus}, Print Found: False"
)
frame = draw_circles_on_frame(
frame, centroid, radius=10, color=(0, 255, 0), thickness=-1
)
# cv2.waitKey(0)
# if centroid['has_print']:
# self.send_payload(resize_to_64_64(frame=frame), message='Print Detected!')
# logger.info(f"Count: {self.count}, Print Found: True")
# else:
# self.send_payload(resize_to_64_64(frame=frame), message='Print Missing!')
# logger.info(f"Count: {self.count}, Print Found: False")
else:
frame = draw_circles_on_frame(
frame, centroid, radius=10, color=(0, 255, 0), thickness=-1
)
# count_text_gold = "ACC_GOLD: " + str(self.count_gold)
# count_text_suraksha = "ACC_SURAKSHA_PLUS: " + str(self.count_suraksha)
# count_text_whitecem = "ACC_WHITE_CEM: " + str(self.count_whitecem)
# count_text_suraksha_power = "ACC_SURAKSHA_POWER: " + str(self.count_suraksha_power)
# count_text_nfr = "ACC_NFR: " + str(self.count_nfr)
# cv2.putText(frame, count_text_gold, (1300, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3,
# cv2.LINE_AA)
# cv2.putText(frame, count_text_suraksha, (1300, 400), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3,
# cv2.LINE_AA)
# cv2.putText(frame, count_text_whitecem, (1300, 600), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3,
# cv2.LINE_AA)
# cv2.putText(frame, count_text_suraksha_power, (1300, 800), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3,
# cv2.LINE_AA)
# cv2.putText(frame, count_text_nfr, (1300, 1000), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3,
# cv2.LINE_AA)
# cv2.putText(frame, self.prev_class_name, (1000, 800), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3,
# cv2.LINE_AA)
return frame
def yellow_thresholding(self, detections, frame, class_detected):
gold_flag = False
mrp_cord = []
cem_bag_cord = []
add_mrp = ""
for det in detections:
if det["class"] == "mrp":
mrp_cord.append(det["points"])
else:
cem_bag_cord.append(det["points"])
for c_cord in cem_bag_cord:
bag_width = c_cord[2] - c_cord[0]
if bag_width > 500:
roi = frame[c_cord[1] + 40: c_cord[3], c_cord[0]: c_cord[2] - 80]
original = roi.copy()
# cv2.imshow("roi", roi)
# cv2.waitKey(0)
image = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
lower = np.array([10, 70, 0], dtype="uint8")
upper = np.array([45, 255, 255], dtype="uint8")
mask = cv2.inRange(image, lower, upper)
cnts = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
# print("contours")
# print(len(cnts))
for c in cnts:
x, y, w, h = cv2.boundingRect(c)
# cv2.imshow("bag with yellow region: {w}".format(w=w), original)
# print(x, y, w, h)
if w > 100:
gold_flag = True
cv2.rectangle(original, (x, y), (x + w, y + h), (36, 255, 12), 2)
#
break
# if gold_flag:
# print("Final Bag: ACC Gold")
# else:
# print("Final Bag: ACC SURAKSHA PP")
return gold_flag
def draw_line_over_image(self, frame, color=(255, 255, 255)):
"""
Draws line over the counting line
:param frame: frame for
:param color:
:return:
"""
_alignment, line_coordinates = self.get_line_coordinates()
assert len(line_coordinates) == 4, "Line coordinates variable is invalid"
# return cv2.line(frame, (line_coordinates[0], line_coordinates[1]), (line_coordinates[2], line_coordinates[3]),
# color, 3)
self.drawline(
frame,
(line_coordinates[0], line_coordinates[1]),
(line_coordinates[2], line_coordinates[3]),
color,
thickness=3,
)
return frame
@staticmethod
def drawline(img, pt1, pt2, color, thickness=1, style="dotted", gap=20):
dist = ((pt1[0] - pt2[0]) ** 2 + (pt1[1] - pt2[1]) ** 2) ** 0.5
pts = []
for i in np.arange(0, dist, gap):
r = i / dist
x = int((pt1[0] * (1 - r) + pt2[0] * r) + 0.5)
y = int((pt1[1] * (1 - r) + pt2[1] * r) + 0.5)
p = (x, y)
pts.append(p)
if style == "dotted":
for p in pts:
cv2.circle(img, p, thickness, color, -1)
else:
s = pts[0]
e = pts[0]
i = 0
for p in pts:
s = e
e = p
if i % 2 == 1:
cv2.line(img, s, e, color, thickness)
i += 1
def crop_polygon(self, cement_bag_img):
pts = np.array([[100, 180], [530, 110], [555, 190], [100, 260]])
## (1) Crop the bounding rect
rect = cv2.boundingRect(pts)
x, y, w, h = rect
croped = cement_bag_img[y: y + h, x: x + w].copy()
## (2) make mask
pts = pts - pts.min(axis=0)
mask = np.zeros(croped.shape[:2], np.uint8)
cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA)
## (3) do bit-op
dst = cv2.bitwise_and(croped, croped, mask=mask)
## (4) add the white background
bg = np.ones_like(croped, np.uint8) * 255
cv2.bitwise_not(bg, bg, mask=mask)
mrp_tag = bg + dst
# import random
#
# cv2.imwrite(
# "E:\\acc_new\\mrp_region_only\\{random_number}.jpg".format(
# random_number=random.randint(1, 100000)
# ),
# mrp_tag,
# )
return mrp_tag
def mrp_digit_count(self, img, detected_class):
digit_num = {"ambuja_plus": 17, "acc_gold": 13, "acc_suraksha_power_plus": 17}
height, width = img.shape[:2]
# print(height, width)
# img[45: 95, 290:379] = [255, 255, 255]
# img[25: 95, 290:500] = [255, 255, 255]
bag_type = detected_class
blank_image = np.zeros((300, 800, 3), np.uint8)
blank_image[:, :] = (255, 255, 255)
l_img = blank_image.copy() # (600, 900, 3)
x_offset = y_offset = 20
l_img[y_offset: y_offset + height, x_offset: x_offset + width] = img.copy()
# cv2.imshow("l_img", l_img)
img = l_img
if bag_type == "ambuja_plus":
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
th, threshed = cv2.threshold(v, 220, 255, cv2.THRESH_BINARY_INV)
image = cv2.bitwise_and(img, img, mask=threshed)
# cv2.imshow("thresholded_image", image)
# pre-process the image by resizing it, converting it to
# graycale, blurring it, and computing an edge map
image = imutils.resize(image, height=150)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 50, 200, 255)
# cv2.imshow("edged", edged)
thresh = cv2.threshold(
blurred, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU
)[1]
# print("After Threshold")
# cv2.imshow("thresh", thresh)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
thresh = cv2.morphologyEx(
thresh, cv2.MORPH_OPEN, kernel, np.ones((5, 5), np.uint8), iterations=2
)
thresh = cv2.morphologyEx(
thresh, cv2.MORPH_CLOSE, kernel, None, None, 1, cv2.BORDER_REFLECT101
)
# cv2.imshow("thresh2", thresh)
cnts = cv2.findContours(
thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
)
cnts = imutils.grab_contours(cnts)
cv2.drawContours(
thresh,
cnts,
-1,
(0, 0, 255),
1,
)
x = self.find_chars(cnts, thresh)
if bag_type == "acc_suraksha_power_plus":
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
th, threshed = cv2.threshold(v, 220, 255, cv2.THRESH_BINARY_INV)
image = cv2.bitwise_and(img, img, mask=threshed)
# pre-process the image by resizing it, converting it to
# graycale, blurring it, and computing an edge map
image = imutils.resize(image, height=150)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 50, 200, 255)
thresh = cv2.threshold(
blurred, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU
)[1]
# print("After Threshold")
# cv2.imshow("thresh4", thresh)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
thresh = cv2.morphologyEx(
thresh, cv2.MORPH_OPEN, kernel, np.ones((5, 5), np.uint8), iterations=2
)
thresh = cv2.morphologyEx(
thresh, cv2.MORPH_CLOSE, kernel, None, None, 1, cv2.BORDER_REFLECT101
)
# print("After Morphology")
# cv2.imshow("thresh5", thresh)
# find contours in the thresholded image, then initialize the
# digit contours lists
cnts = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnts = imutils.grab_contours(cnts)
cv2.drawContours(
thresh,
cnts,
-1,
(0, 255, 0),
1,
)
# cv2.imshow("thresh6", thresh)
x = self.find_chars(cnts, thresh)
if bag_type == "acc_gold":
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
# cv2.imshow("img", img)
th, threshed = cv2.threshold(v, 220, 255, cv2.THRESH_BINARY_INV)
image = cv2.bitwise_and(img, img, mask=threshed)
# cv2.imshow("bitwise and", image)
# pre-process the image by resizing it, converting it to
# graycale, blurring it, and computing an edge map
image = imutils.resize(image, height=150)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 50, 200, 255)
thresh = cv2.threshold(
blurred, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU
)[1]
# cv2.imshow("thresh7", thresh)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
thresh = cv2.morphologyEx(
thresh, cv2.MORPH_OPEN, kernel, np.ones((5, 5), np.uint8), iterations=2
)
thresh = cv2.morphologyEx(
thresh, cv2.MORPH_CLOSE, kernel, None, None, 1, cv2.BORDER_REFLECT101
)
# cv2.imshow("thresh9", thresh)
# find contours in the thresholded image, then initialize the
# digit contours lists
cnts = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnts = imutils.grab_contours(cnts)
cv2.drawContours(
thresh,
cnts,
-1,
(0, 255, 0),
1,
)
# cv2.imshow("thresh10", thresh)
x = self.find_chars(cnts, thresh)
# cv2.imshow("thresh11", thresh)
if x >= (digit_num[detected_class] - 2):
return x, True
else:
return x, False
def distances(self, detections, frame, class_detected):
mrp_cord = []
cem_bag_cord = []
add_mrp = ""
mrp_roi = ""
for det in detections:
if det["class"] == "mrp":
mrp_cord.append(det["points"])
else:
cem_bag_cord.append(det["points"])
# if mrp_cord == []:
# return False, add_mrp, mrp_roi
for c_cord in cem_bag_cord:
bag_width = c_cord[2] - c_cord[0]
if bag_width > 500:
roi = frame[c_cord[1]: c_cord[3], c_cord[0]: c_cord[2]]
# cv2.imwrite("E:\\acc_new\\masked_bag_ambuja_\\{count}.jpg".format(count = self.count), roi)
# self.count+= 1
h, w, _ = roi.shape
# cv2.imshow("cement_bag", roi)
roi_half_h = roi[int((h / 2) - 0): h, 0:w]
roi_half_v = roi_half_h[50: int((h / 2)), 0: w - 150]
# cv2.imshow("roi", roi)
mrp_roi = "data:image/jpeg;base64," + base64.b64encode(
cv2.imencode(".jpg", roi)[1].tostring()
).decode("utf-8")
extra_values = get_extra_fields(self.device_id)
mrp_detect = extra_values.get(JanusDeploymentConstants.MRP_DETECT_KEY)
if mrp_detect is not None and mrp_detect.lower() == "yes":
mrp_add = self.mrp_image(roi_half_v, class_detected)
# cv2.imshow("mrp_add", mrp_add)
if mrp_add is not None:
# mrp_region = self.crop_polygon(roi)
# # cv2.imshow("mrp_region_", mrp_region)
# mrp_digits, mrp_status = self.mrp_digit_count(
# mrp_region, class_detected
# )
# # cv2.waitKey(0)
# if mrp_status:
# mrp_check = "PASS"
# else:
# mrp_check = "FAIL"
# cv2.imshow("mrp yes", cv2.resize(mrp_add, (400, 300)))
self.mrp_counter = 0
add_mrp = "data:image/jpeg;base64," + base64.b64encode(
cv2.imencode(".jpg", mrp_add)[1].tostring()
).decode("utf-8")
return True, add_mrp, mrp_roi
else:
# cv2.imshow("mrp no", mrp_add)
self.mrp_counter += 1
if self.mrp_counter >= 5:
self.mrp_counter = 0
logger.debug("activate relay")
self.insert_alarm_event(
asset_hierarchy=self.camera_details.get(
"asset_hierarchy", ""
),
message=self.camera_details.get("asset_name", "")
+ " - "
+ class_detected
+ " : MRP missed",
)
RelayHandler().update_relay_status(
self.camera_details.get("belt_relay_ep", ""),
dict(triggerStatus="stop"),
)
logger.debug(
"Stopped the relay because of 5 consecutive MRP misses"
)
return False, add_mrp, mrp_roi
def inference(
self,
frame,
classes,
):
# TRT Additions start
# dets = self.yp.predict(frame)
result_boxes, result_scores, result_classid = self.yolo_v5_wrapper.infer(frame)
dets = [{"points": list(points), "conf": conf, "class": self.classes.get(class_id)} for points, conf, class_id
in
zip(result_boxes, result_scores, result_classid)]
# TRT Additions stop
class_name = list()
bboxs = []
if dets:
for i in dets:
if (i["class"] in classes):
class_name.append(i["class"])
# cv2.rectangle(frame, (i["points"][0], i["points"][1]), (i["points"][2], i["points"][3]), (255, 255, 0), 2)
bboxs.append([i["points"][1], i["points"][0], i["points"][3], i["points"][2]])
# frame = cv2.rectangle(frame, (bboxs[0][0], bboxs[0][1]), (bboxs[0][2], bboxs[0][3]),(255, 255, 0) , 2)
return bboxs, frame, dets, class_name
def _predict(self, obj):
class_list = ["acc_gold", "acc_suraksha_power_plus", "ambuja_buildcem", "acc_nfr", "acc_suraksha_power",
"acc_concrete_plus", "ambuja_plus"]
mrp = ["mrp"]
try:
frame = obj['frame']
dets, frame, _dets, class_name = self.inference(frame, class_list)
frame = self.draw_line_over_image(frame)
frame, objects, boxs = self.kalman_tracker(dets, frame)
frame = self.update_bag_count(frame=frame, detection_objects=objects, class_name=class_name,
detections=_dets)
logger.debug("self.uncounted_objects --> {}".format(self.uncounted_objects))
obj['frame'] = cv2.resize(frame, (self.config.get('FRAME_WIDTH'), self.config.
get('FRAME_HEIGHT')))
except Exception as e:
logger.exception(f"Error: {e}", exc_info=True)
obj['frame'] = cv2.resize(obj['frame'], (self.config.get('FRAME_WIDTH'), self.config.get('FRAME_HEIGHT')))
return obj
def detect_mrp(self, img, class_detected):
# print("Finding Pixels")
black_pixel = 0
white_pixel = 100
# img_bgr = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
pixels = img.reshape(-1, 3)
for pixel in pixels:
if pixel[0] == pixel[1] == pixel[2] == 0:
black_pixel += 1
else:
white_pixel += 1
black_to_white = (white_pixel / black_pixel) * 100
black_white_ratio = self.black_white_ratio_dict[
"{class_name}".format(class_name=class_detected)
]
# print("black_white_ratio")
# print(black_white_ratio)
# cv2.imshow("black white ratio {black_white_ratio1}.jpg".format(black_white_ratio1=str(black_to_white)),
# img)
# if(black_to_white == 14.99204665959703):
# cv2.imwrite("black white ratio {black_white_ratio1}".format(black_white_ratio1=str(black_to_white)), img)
if int(black_to_white) < int(black_white_ratio):
flag = False
else:
# print("MRP present, finding MRP region..")
flag = True
return flag
def mrp_image(self, img, class_detected):
ROI = None
try:
# cv2.imshow("roi image", img)
img_cp = img.copy()
rgb_planes = cv2.split(img_cp)
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
result_planes = []
for plane in rgb_planes:
dilated_img = cv2.dilate(plane, np.ones((7, 7), np.uint8))
bg_img = cv2.medianBlur(dilated_img, 21)
diff_img = 255 - cv2.absdiff(plane, bg_img)
result_planes.append(diff_img)
result = cv2.merge(result_planes)
# print("Original Image but cropped,and shadow removed")
# cv2.imshow("shadow_removed", result)_predict
shadow_removed = result.copy()
result = cv2.filter2D(src=result, ddepth=-1, kernel=kernel)
# cv2_imshow(result)
img_cp = result
# Convert BGR to HSV
hsv = cv2.cvtColor(img_cp, cv2.COLOR_BGR2HSV)
# define range of black color in HSV
lower_val = np.array([0, 0, 160])
upper_val = np.array([180, 230, 200])
# Threshold the HSV image to get only black colors
mask = cv2.inRange(hsv, lower_val, upper_val)
# Bitwise-AND mask and original image
res = cv2.bitwise_and(img_cp, img_cp, mask=mask)
mrp_flag = self.detect_mrp(res, class_detected)
if not mrp_flag:
return ROI
else:
# cv2.imshow("res", res)
return res
except Exception as e:
logger.exception(f"Error: {e}", exc_info=True)
return ROI
\ No newline at end of file
import os
import sys
import json
from pymongo import MongoClient
MAIN_OS_VARIABLE = json.loads(os.environ.get('config'))
if MAIN_OS_VARIABLE is None:
sys.stderr.write("Configuration not found...")
sys.stderr.write("Exiting....")
sys.exit(1)
MONGO_URI = MAIN_OS_VARIABLE.get('MONGO_URI')
MONGO_SERVICE_DB = MAIN_OS_VARIABLE.get('MONGO_DB')
MONGO_SERVICE_COLL = MAIN_OS_VARIABLE.get('MONGO_COLL')
PASS_KEY = MAIN_OS_VARIABLE.get('PASS_KEY')
MONGO_DB_OBJ = MongoClient(MONGO_URI)[MONGO_SERVICE_DB]
HOST_CONFIG = MONGO_DB_OBJ[MONGO_SERVICE_COLL].find_one({'configId': 'hostConfig'}).get('config')
APP_MONGO_COLLECTION = MONGO_DB_OBJ[MONGO_SERVICE_COLL].find_one({'configId': 'appMongoConfig'}).get('config')
class JanusDeploymentConstants:
JANUS_DEPLOYMENT_COLLECTION = "janusDeploymentConfigurations"
DEPLOYMENT_ID = 'deploymentId'
EXTRA_FIELDS_KEY = 'extra_fields'
LINE_COORDINATES = ['x1', 'y1', 'x2', 'y2']
COUNT_BAGS_FLAG = 'count_bags'
ALIGNMENT_KEY = 'alignment'
VERTICAL = 'vertical'
HORIZONTAL = 'horizontal'
MODEL_KEY = 'model'
DIRECTION_KEY = 'direction'
MRP_DETECT_KEY = 'mrp_detect'
class CameraConstants:
videortpmap="VP8/90000"
videopt=96
threads=3
gStreamer = False
eventType = 'deploy'
created_by = 'user_6501'
event_status = 'pending'
deploymentTypeCreate = 'upgrade_and_deploy'
deploymentTypeRemove = 'remove'
pipeline_internal = {}
pipeline_category = "ai"
thread = 1
job_id = "pipeline_129"
deployment_key = 'deploymentId'
pipeline_deployment_type = 'docker'
docker_deployment_type = 'single'
command_eventtype = 'command'
command_type = 'docker'
restart_command = "restart_container"
stop_command = "stop_container"
start_command = "start_container"
from edge_engine.ai.model.modelwraper import ModelWrapper
import cv2
import base64
class LoopBackModel(ModelWrapper):
def __init__(self, pubs, path=None, ):
super().__init__(path)
self.mqtt = pubs.mqtt_pub
def _pre_process(self, x):
return x
def _post_process(self, x):
image = cv2.imencode('.jpg', x['frame'])[1].tostring()
image = 'data:image/jpeg;base64,' + base64.b64encode(image).decode("utf-8")
x['frame'] = image
self.mqtt.publish(x)
return x
def _predict(self, x):
return x
def predict(self, x):
return super().predict(x)
# import the necessary packages
from scipy.spatial import distance as dist
from collections import OrderedDict
import numpy as np
from edge_engine.common.logsetup import logger
class CentroidTracker():
def __init__(self, maxDisappeared=50):
# initialize the next unique object ID along with two ordered
# dictionaries used to keep track of mapping a given object
# ID to its centroid and number of consecutive frames it has
# been marked as "disappeared", respectively
self.nextObjectID = 0
self.objects = OrderedDict()
self.disappeared = OrderedDict()
# store the number of maximum consecutive frames a given
# object is allowed to be marked as "disappeared" until we
# need to deregister the object from tracking
self.maxDisappeared = maxDisappeared
def register(self, centroid):
# when registering an object we use the next available object
# ID to store the centroid
self.objects[self.nextObjectID] = {'has_print': False, 'centroid': centroid}
self.disappeared[self.nextObjectID] = 0
self.nextObjectID += 1
def deregister(self, objectID):
# to deregister an object ID we delete the object ID from
# both of our respective dictionaries
del self.objects[objectID]
del self.disappeared[objectID]
def update(self, rects):
# check to see if the list of input bounding box rectangles
# is empty
if len(rects) == 0:
# loop over any existing tracked objects and mark them
# as disappeared
for objectID in list(self.disappeared.keys()):
self.disappeared[objectID] += 1
# if we have reached a maximum number of consecutive
# frames where a given object has been marked as
# missing, deregister it
if self.disappeared[objectID] > self.maxDisappeared:
self.deregister(objectID)
# return early as there are no centroids or tracking info
# to update
return self.objects
# initialize an array of input centroids for the current frame
inputCentroids = np.zeros((len(rects), 2), dtype="int")
# loop over the bounding box rectangles
for (i, (startX, startY, endX, endY)) in enumerate(rects):
# use the bounding box coordinates to derive the centroid
cX = int((startX + endX) / 2.0)
cY = int((startY + endY) / 2.0)
inputCentroids[i] = (cX, cY)
# if we are currently not tracking any objects take the input
# centroids and register each of them
if len(self.objects) == 0:
for i in range(0, len(inputCentroids)):
self.register(inputCentroids[i])
# otherwise, are are currently tracking objects so we need to
# try to match the input centroids to existing object
# centroids
else:
# grab the set of object IDs and corresponding centroids
objectIDs = list(self.objects.keys())
objectCentroids = [e['centroid'] for e in self.objects.values()]
# compute the distance between each pair of object
# centroids and input centroids, respectively -- our
# goal will be to match an input centroid to an existing
# object centroid
# logger.info(f"OBC --> {objectCentroids}")
try:
D = dist.cdist(np.array(objectCentroids), inputCentroids)
except Exception as e:
logger.info(f"objectCentroids --> {objectCentroids}")
logger.info(f"inputCentroids --> {inputCentroids}")
logger.exception(e)
# in order to perform this matching we must (1) find the
# smallest value in each row and then (2) sort the row
# indexes based on their minimum values so that the row
# with the smallest value as at the *front* of the index
# list
rows = D.min(axis=1).argsort()
# next, we perform a similar process on the columns by
# finding the smallest value in each column and then
# sorting using the previously computed row index list
cols = D.argmin(axis=1)[rows]
# in order to determine if we need to update, register,
# or deregister an object we need to keep track of which
# of the rows and column indexes we have already examined
usedRows = set()
usedCols = set()
# loop over the combination of the (row, column) index
# tuples
for (row, col) in zip(rows, cols):
# if we have already examined either the row or
# column value before, ignore it
# val
if row in usedRows or col in usedCols:
continue
# otherwise, grab the object ID for the current row,
# set its new centroid, and reset the disappeared
# counter
objectID = objectIDs[row]
self.objects[objectID]['centroid'] = inputCentroids[col]
self.disappeared[objectID] = 0
# indicate that we have examined each of the row and
# column indexes, respectively
usedRows.add(row)
usedCols.add(col)
# compute both the row and column index we have NOT yet
# examined
unusedRows = set(range(0, D.shape[0])).difference(usedRows)
unusedCols = set(range(0, D.shape[1])).difference(usedCols)
# in the event that the number of object centroids is
# equal or greater than the number of input centroids
# we need to check and see if some of these objects have
# potentially disappeared
if D.shape[0] >= D.shape[1]:
# loop over the unused row indexes
for row in unusedRows:
# grab the object ID for the corresponding row
# index and increment the disappeared counter
objectID = objectIDs[row]
self.disappeared[objectID] += 1
# check to see if the number of consecutive
# frames the object has been marked "disappeared"
# for warrants deregistering the object
if self.disappeared[objectID] > self.maxDisappeared:
self.deregister(objectID)
# otherwise, if the number of input centroids is greater
# than the number of existing object centroids we need to
# register each new input centroid as a trackable object
else:
for col in unusedCols:
self.register(inputCentroids[col])
# return the set of trackable objects
return self.objects
from scripts.common.constants import JanusDeploymentConstants
from scripts.common.config import MONGO_DB_OBJ, APP_MONGO_COLLECTION
import cv2
from edge_engine.common.logsetup import logger
#from scripts.common.config import MONGO_DB_OBJ, APP_MONGO_COLLECTION
#from scripts.common.constants import JanusDeploymentConstants
class Utilities:
@classmethod
def get_extra_fields(
cls,
device_id,
):
_janus_deployment = MONGO_DB_OBJ[
APP_MONGO_COLLECTION.get(JanusDeploymentConstants.JANUS_DEPLOYMENT_COLLECTION)].find_one(
{JanusDeploymentConstants.DEPLOYMENT_ID: device_id}).get(
JanusDeploymentConstants.EXTRA_FIELDS_KEY)
if _janus_deployment is None:
raise ValueError("Janus deployment configuration is not found/corrupted")
_key_dictionary = dict()
for each_field in _janus_deployment:
_key_dictionary[each_field['key']] = each_field['value']
return _key_dictionary
@classmethod
def get_direction(
cls,
device_id,
):
logger.debug("Getting the direction from DB")
return MONGO_DB_OBJ[APP_MONGO_COLLECTION.get(JanusDeploymentConstants.JANUS_DEPLOYMENT_COLLECTION)].find_one(
{JanusDeploymentConstants.DEPLOYMENT_ID: device_id}).get(
JanusDeploymentConstants.DIRECTION_KEY)
@classmethod
def set_direction(
cls,
device_id: str,
direction: bool,
):
logger.debug("Updating the direction in DB")
updated_values = {"$set": {JanusDeploymentConstants.DIRECTION_KEY: direction}}
MONGO_DB_OBJ[APP_MONGO_COLLECTION.get(JanusDeploymentConstants.JANUS_DEPLOYMENT_COLLECTION)].update_one(
{JanusDeploymentConstants.DEPLOYMENT_ID: device_id}, updated_values)
@classmethod
def draw_circles_on_frame(
cls,
frame,
point,
radius=3,
color=(255, 255, 255),
thickness=1,
):
"""
draw circle on the objects
:param radius: radius of the circle
:param frame: frame to draw on
:param point: co-ordinate to draw on
:param color: color of the circle
:param thickness: thickness of the circle
:return: frame
"""
return cv2.circle(frame, tuple(point), radius, color, thickness)
@classmethod
def resize_to_64_64(
cls,
frame,
):
"""
resize the from
:param frame: frame
:return: frame
"""
return cv2.resize(frame, (64, 64))
def get_extra_fields(device_id):
# _janus_deployment = [
# {
# "type": "number",
# "key": "x1",
# "value": 1000
# },
# {
# "type": "number",
# "key": "y1",
# "value": 0
# },
# {
# "type": "number",
# "key": "x2",
# "value": 1001
# },
# {
# "type": "number",
# "key": "y2",
# "value": 720
# },
# {
# "type": "dropdown",
# "key": "alignment",
# "value": "vertical"
# }
# ]
_janus_deployment = MONGO_DB_OBJ[APP_MONGO_COLLECTION.get(JanusDeploymentConstants.JANUS_DEPLOYMENT_COLLECTION)]. \
find_one({JanusDeploymentConstants.DEPLOYMENT_ID: device_id}).get(JanusDeploymentConstants.EXTRA_FIELDS_KEY)
if _janus_deployment is None:
raise ValueError("Janus deployment configuration is not found/corrupted")
_key_dictionary = dict()
for each_field in _janus_deployment:
_key_dictionary[each_field['key']] = each_field['value']
return _key_dictionary
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 20 14:51:33 2017
@author: kyleguan
"""
import numpy as np
import cv2
class Box:
def __init__(self):
self.x, self.y = float(), float()
self.w, self.h = float(), float()
self.c = float()
self.prob = float()
def overlap(x1, w1, x2, w2):
l1 = x1 - w1 / 2.;
l2 = x2 - w2 / 2.;
left = max(l1, l2)
r1 = x1 + w1 / 2.;
r2 = x2 + w2 / 2.;
right = min(r1, r2)
return right - left;
def box_intersection(a, b):
w = overlap(a.x, a.w, b.x, b.w);
h = overlap(a.y, a.h, b.y, b.h);
if w < 0 or h < 0: return 0;
area = w * h;
return area;
def box_union(a, b):
i = box_intersection(a, b);
u = a.w * a.h + b.w * b.h - i;
return u;
def box_iou(a, b):
return box_intersection(a, b) / box_union(a, b);
def box_iou2(a, b):
'''
Helper funciton to calculate the ratio between intersection and the union of
two boxes a and b
a[0], a[1], a[2], a[3] <-> left, up, right, bottom
'''
w_intsec = np.maximum(0, (np.minimum(a[2], b[2]) - np.maximum(a[0], b[0])))
h_intsec = np.maximum(0, (np.minimum(a[3], b[3]) - np.maximum(a[1], b[1])))
s_intsec = w_intsec * h_intsec
s_a = (a[2] - a[0]) * (a[3] - a[1])
s_b = (b[2] - b[0]) * (b[3] - b[1])
return float(s_intsec) / (s_a + s_b - s_intsec)
def convert_to_pixel(box_yolo, img, crop_range):
'''
Helper function to convert (scaled) coordinates of a bounding box
to pixel coordinates.
Example (0.89361443264143803, 0.4880486045564924, 0.23544462956491041,
0.36866588651069609)
crop_range: specifies the part of image to be cropped
'''
box = box_yolo
imgcv = img
[xmin, xmax] = crop_range[0]
[ymin, ymax] = crop_range[1]
h, w, _ = imgcv.shape
# Calculate left, top, width, and height of the bounding box
left = int((box.x - box.w / 2.) * (xmax - xmin) + xmin)
top = int((box.y - box.h / 2.) * (ymax - ymin) + ymin)
width = int(box.w * (xmax - xmin))
height = int(box.h * (ymax - ymin))
# Deal with corner cases
if left < 0: left = 0
if top < 0: top = 0
# Return the coordinates (in the unit of the pixels)
box_pixel = np.array([left, top, width, height])
return box_pixel
def convert_to_cv2bbox(bbox, img_dim=(1280, 720)):
'''
Helper fucntion for converting bbox to bbox_cv2
bbox = [left, top, width, height]
bbox_cv2 = [left, top, right, bottom]
img_dim: dimension of the image, img_dim[0]<-> x
img_dim[1]<-> y
'''
left = np.maximum(0, bbox[0])
top = np.maximum(0, bbox[1])
right = np.minimum(img_dim[0], bbox[0] + bbox[2])
bottom = np.minimum(img_dim[1], bbox[1] + bbox[3])
return (left, top, right, bottom)
def draw_box_label(id, img, bbox_cv2, box_color=(0, 255, 255), show_label=True):
'''
Helper funciton for drawing the bounding boxes and the labels
bbox_cv2 = [left, top, right, bottom]
'''
# box_color= (0, 255, 255)
font = cv2.FONT_HERSHEY_SIMPLEX
font_size = 0.7
font_color = (0, 0, 0)
left, top, right, bottom = bbox_cv2[1], bbox_cv2[0], bbox_cv2[3], bbox_cv2[2]
# Draw the bounding box
cv2.rectangle(img, (left, top), (right, bottom), box_color, 4)
# centroid = [int(left+((right - left)/2)), int(top+((bottom - top)/2))]
if show_label:
# Draw a filled box on top of the bounding box (as the background for the labels)
cv2.rectangle(img, (left - 2, top - 45), (right + 2, top), box_color, -1, 1)
# Output the labels that show the x and y coordinates of the bounding box center.
text_x = 'id=' + str(id)
cv2.putText(img, text_x, (left, top - 25), font, font_size, font_color, 1, cv2.LINE_AA)
text_y = 'y=' + str((top + bottom) / 2)
# cv2.putText(img, text_y, (left, top - 5), font, font_size, font_color, 1, cv2.LINE_AA)
return img
import os
import requests
GENERATED_TOKEN_KEY = "model_container_pregenerated_token"
LOGIN_TOKEN_KEY = "login-token"
def delete(**kwargs):
if os.environ.get(GENERATED_TOKEN_KEY):
cookies = headers = {LOGIN_TOKEN_KEY: os.environ.get(GENERATED_TOKEN_KEY)}
kwargs.update(dict(cookies=cookies, headers=headers))
return requests.delete(**kwargs)
def get(**kwargs):
if os.environ.get(GENERATED_TOKEN_KEY):
cookies = headers = {LOGIN_TOKEN_KEY: os.environ.get(GENERATED_TOKEN_KEY)}
kwargs.update(dict(cookies=cookies, headers=headers))
return requests.get(**kwargs)
def head(**kwargs):
if os.environ.get(GENERATED_TOKEN_KEY):
cookies = headers = {LOGIN_TOKEN_KEY: os.environ.get(GENERATED_TOKEN_KEY)}
kwargs.update(dict(cookies=cookies, headers=headers))
return requests.head(**kwargs)
def patch(**kwargs):
if os.environ.get(GENERATED_TOKEN_KEY):
cookies = headers = {LOGIN_TOKEN_KEY: os.environ.get(GENERATED_TOKEN_KEY)}
kwargs.update(dict(cookies=cookies, headers=headers))
return requests.patch(**kwargs)
def post(**kwargs):
if os.environ.get(GENERATED_TOKEN_KEY):
cookies = headers = {LOGIN_TOKEN_KEY: os.environ.get(GENERATED_TOKEN_KEY)}
kwargs.update(dict(cookies=cookies, headers=headers))
return requests.post(**kwargs)
def put(**kwargs):
if os.environ.get(GENERATED_TOKEN_KEY):
cookies = headers = {LOGIN_TOKEN_KEY: os.environ.get(GENERATED_TOKEN_KEY)}
kwargs.update(dict(cookies=cookies, headers=headers))
return requests.put(**kwargs)
from edge_engine.common.logsetup import logger
import cv2
def draw_circles_on_frame(frame, point, radius=3, color=(255, 255, 255), thickness=1):
"""
draw circle on the objects
:param radius: radius of the circle
:param frame: frame to draw on
:param point: co-ordinate to draw on
:param color: color of the circle
:param thickness: thickness of the circle
:return: frame
"""
logger.debug("Drawing circle centroid on the frame")
return cv2.circle(frame, tuple(point), radius, color, thickness)
def resize_to_64_64(frame):
"""
resize the from
:param frame: frame
:return: frame
"""
logger.debug("Resizing the frame to 64 x 64")
return cv2.resize(frame, (64, 64))
\ No newline at end of file
import os
from datetime import datetime
# Security changes start
# from requests import post
from scripts.utils.ilens_request_handler import post
# Security changes stop
from uuid import uuid1
from urllib.parse import urljoin
from edge_engine.common.logsetup import logger
from scripts.common.config import MONGO_DB_OBJ, APP_MONGO_COLLECTION
class MongoLogger:
def __init__(self):
self.attendance_event_collection = MONGO_DB_OBJ[APP_MONGO_COLLECTION.get('eventLogCollection')]
self.camera_configuration = MONGO_DB_OBJ[APP_MONGO_COLLECTION.get('cameraConfigurationCollection')]
self.camera_mapping_json = self.get_all_cameras()
def get_all_cameras(self):
camera_mapping_json = self.camera_configuration.find({'decommissioned': False}, {"_id": 0})
camera_json = {}
for each in camera_mapping_json:
camera_json[each['cameraId']] = each['cameraName']
return camera_json
@staticmethod
def update_count_api(bag_type):
asset_id = os.environ.get('asset_id')
asset_hierarchy = os.environ.get('asset_hierarchy')
count_update_endpoint = os.environ.get('count_update_endpoint')
logger.debug("count_update_endpoint",count_update_endpoint)
if asset_id is not None and count_update_endpoint is not None and asset_hierarchy is not None:
response = post(url=count_update_endpoint,
json=dict(asset_hierarchy=asset_hierarchy, count_increment=1, asset_id=asset_id,
bag_type=bag_type), timeout=5)
if response.status_code != 200:
logger.warning(
"Value not updated in cards!. Invalid response from Update Count API: {}".format(response.content))
else:
logger.warning("Either asset_id, asset_hierarchy or count_update_endpoint is not set!."
" Not updating the cards API!")
def insert_attendance_event_to_mongo(self, data):
try:
input_data = {
"eventId": str(uuid1()).split('-')[0],
"cameraId": data['deviceId'],
"cameraName": self.camera_mapping_json.get(data['deviceId'], "Thermal Camera"),
"timestamp": datetime.now(),
"frame": data['frame'],
"eventtype": "Intrusion Detection",
"bg_color": data["bg_color"],
"font_color": data["font_color"],
"intrusion_message": data["message"],
"alert_sound": data["alert_sound"],
"logged_activity": data["activity"],
"mrp_frmae": data["mrp_frmae"],
"mrp_roi": data["mrp_roi"]}
if os.environ.get('app') is not None:
input_data['app'] = os.environ.get('app')
logger.info("Pushing to Mongo..")
self.attendance_event_collection.insert_one(input_data)
self.update_count_api(data["bag_type"])
except Exception as e:
logger.exception(e)
def get_camera_details(self, camera_id):
camera_details_json = self.camera_configuration.find_one({"cameraId": camera_id})
return camera_details_json
from datetime import datetime
from scripts.common.config import MONGO_DB_OBJ
class ModelCountTracker:
def __init__(
self,
device_id,
) -> None:
self.device_id = device_id
self.count_tracker = None
self._reset_tracker()
def _reset_tracker(self):
self.count_tracker = list()
def __call__(
self,
conf: float,
) -> None:
self.count_tracker.append(
{
"time": datetime.now(),
"deviceId": self.device_id,
"count_confidence": conf
})
if len(self.count_tracker) >= 10:
self.insert_to_mongo(self.count_tracker)
self._reset_tracker()
@staticmethod
def insert_to_mongo(
payload: list,
collection_name: str = "model_count_tracker"
) -> None:
MONGO_DB_OBJ[collection_name].insert_many(payload)
class ModelTracker:
def __init__(
self,
device_id,
) -> None:
self.device_id = device_id
self.model_tracker = None
self._reset_tracker()
def _reset_tracker(self):
self.model_tracker = list()
def __call__(
self,
conf: float,
) -> None:
self.model_tracker.append(
{
"time": datetime.now(),
"deviceId": self.device_id,
"model_confidence": conf
})
if len(self.model_tracker) >= 500:
self.insert_to_mongo(self.model_tracker)
self._reset_tracker()
@staticmethod
def insert_to_mongo(
payload: list,
collection_name: str = "model_confidence_tracker"
) -> None:
MONGO_DB_OBJ[collection_name].insert_many(payload)
import time
import requests
from edge_engine.common.logsetup import logger
class RelayHandler:
@staticmethod
def update_relay_status(
ep: str,
payload: dict,
) -> None:
logger.debug("Updating the relay status to : {}".format(payload))
response = None
for _ in range(0, 3):
response = requests.post(url=ep, json=payload, timeout=10)
if response.status_code == 200 and response.json().get('status'):
return
time.sleep(1)
logger.error("Unable to update the relay status. Error: {}".format(response.content))
raise RuntimeError("Unable to communicate to belt relay!")
\ No newline at end of file
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import numpy as np
from numpy import dot
from scipy.linalg import inv, block_diag
class Tracker(): # class for Kalman Filter based tracker
def __init__(self):
# Initialize parametes for tracker (history)
self.id = 0 # tracker's id
self.box = [] # list to store the coordinates for a bounding box
self.hits = 0 # number of detection matches
self.no_losses = 0 # number of unmatched tracks (track loss)
# Initialize parameters for Kalman Filtering
# The state is the (x, y) coordinates of the detection box
# state: [up, up_dot, left, left_dot, down, down_dot, right, right_dot]
# or[up, up_dot, left, left_dot, height, height_dot, width, width_dot]
self.x_state = []
self.dt = 1. # time interval
# Process matrix, assuming constant velocity model
self.F = np.array([[1, self.dt, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 1, self.dt, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 1, self.dt, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 1, self.dt],
[0, 0, 0, 0, 0, 0, 0, 1]])
# Measurement matrix, assuming we can only measure the coordinates
self.H = np.array([[1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0]])
# Initialize the state covariance
self.L = 100.0
self.P = np.diag(self.L * np.ones(8))
# Initialize the process covariance
self.Q_comp_mat = np.array([[self.dt ** 4 / 2., self.dt ** 3 / 2.],
[self.dt ** 3 / 2., self.dt ** 2]])
self.Q = block_diag(self.Q_comp_mat, self.Q_comp_mat,
self.Q_comp_mat, self.Q_comp_mat)
# Initialize the measurement covariance
self.R_ratio = 1.0 / 16.0
self.R_diag_array = self.R_ratio * np.array([self.L, self.L, self.L, self.L])
self.R = np.diag(self.R_diag_array)
def update_R(self):
R_diag_array = self.R_ratio * np.array([self.L, self.L, self.L, self.L])
self.R = np.diag(R_diag_array)
def kalman_filter(self, z):
'''
Implement the Kalman Filter, including the predict and the update stages,
with the measurement z
'''
x = self.x_state
# Predict
x = dot(self.F, x)
self.P = dot(self.F, self.P).dot(self.F.T) + self.Q
# Update
S = dot(self.H, self.P).dot(self.H.T) + self.R
K = dot(self.P, self.H.T).dot(inv(S)) # Kalman gain
y = z - dot(self.H, x) # residual
x += dot(K, y)
self.P = self.P - dot(K, self.H).dot(self.P)
self.x_state = x.astype(int) # convert to integer coordinates
# (pixel values)
def predict_only(self):
'''
Implment only the predict stage. This is used for unmatched detections and
unmatched tracks
'''
x = self.x_state
# Predict
x = dot(self.F, x)
self.P = dot(self.F, self.P).dot(self.F.T) + self.Q
self.x_state = x.astype(int)
if __name__ == "__main__":
import matplotlib.pyplot as plt
import glob
import helpers
# Creat an instance
trk = Tracker()
# Test R_ratio
trk.R_ratio = 1.0 / 16
# Update measurement noise covariance matrix
trk.update_R()
# Initial state
x_init = np.array([390, 0, 1050, 0, 513, 0, 1278, 0])
x_init_box = [x_init[0], x_init[2], x_init[4], x_init[6]]
# Measurement
z = np.array([399, 1022, 504, 1256])
trk.x_state = x_init.T
trk.kalman_filter(z.T)
# Updated state
x_update = trk.x_state
x_updated_box = [x_update[0], x_update[2], x_update[4], x_update[6]]
print('The initial state is: ', x_init)
print('The measurement is: ', z)
print('The update state is: ', x_update)
# Visualize the Kalman filter process and the
# impact of measurement nosie convariance matrix
images = [plt.imread(file) for file in glob.glob('./test_images/*.jpg')]
img = images[3]
plt.figure(figsize=(10, 14))
helpers.draw_box_label(img, x_init_box, box_color=(0, 255, 0))
ax = plt.subplot(3, 1, 1)
plt.imshow(img)
plt.title('Initial: ' + str(x_init_box))
helpers.draw_box_label(img, z, box_color=(255, 0, 0))
ax = plt.subplot(3, 1, 2)
plt.imshow(img)
plt.title('Measurement: ' + str(z))
helpers.draw_box_label(img, x_updated_box)
ax = plt.subplot(3, 1, 3)
plt.imshow(img)
plt.title('Updated: ' + str(x_updated_box))
plt.show()
import ctypes
import time
import cv2
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
PLUGIN_LIBRARY = "/home/ilens/container_weights/libmyplugins.so"
ctypes.CDLL(PLUGIN_LIBRARY)
class YoloV5TRT(object):
"""
description: A YOLOv5 class that warps TensorRT ops, preprocess and postprocess ops.
"""
def __init__(self, engine_file_path, conf_thresh, iou_thresh):
self.CONF_THRESH = conf_thresh
self.IOU_THRESHOLD = iou_thresh
self.ctx = cuda.Device(0).make_context()
stream = cuda.Stream()
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
runtime = trt.Runtime(TRT_LOGGER)
with open(engine_file_path, "rb") as f:
engine = runtime.deserialize_cuda_engine(f.read())
context = engine.create_execution_context()
host_inputs = []
cuda_inputs = []
host_outputs = []
cuda_outputs = []
bindings = []
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
host_mem = cuda.pagelocked_empty(size, dtype)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(cuda_mem))
if engine.binding_is_input(binding):
self.input_w = engine.get_binding_shape(binding)[-1]
self.input_h = engine.get_binding_shape(binding)[-2]
host_inputs.append(host_mem)
cuda_inputs.append(cuda_mem)
else:
host_outputs.append(host_mem)
cuda_outputs.append(cuda_mem)
self.stream = stream
self.context = context
self.engine = engine
self.host_inputs = host_inputs
self.cuda_inputs = cuda_inputs
self.host_outputs = host_outputs
self.cuda_outputs = cuda_outputs
self.bindings = bindings
self.batch_size = engine.max_batch_size
def infer(self, frame):
self.ctx.push()
stream = self.stream
context = self.context
engine = self.engine
host_inputs = self.host_inputs
cuda_inputs = self.cuda_inputs
host_outputs = self.host_outputs
cuda_outputs = self.cuda_outputs
bindings = self.bindings
input_image, image_raw, origin_h, origin_w = self.preprocess_image(frame)
np.copyto(host_inputs[0], input_image.ravel())
start = time.time()
cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
context.execute_async(batch_size=self.batch_size, bindings=bindings, stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
stream.synchronize()
self.ctx.pop()
output = host_outputs[0]
result_boxes, result_scores, result_classid = [], [], []
for i in range(self.batch_size):
result_boxes, result_scores, result_classid = self.post_process(
output[i * 6001: (i + 1) * 6001], frame.shape[0], frame.shape[1])
# if len(result_boxes) > 0:
# for i in range(len(result_boxes)):
# print("Result -->", result_boxes[i])
# print("Scores -->", result_scores[i])
# print("Class ID -->", result_classid[i])
# result_boxes = list(map(int, result_boxes))
return result_boxes, result_scores, result_classid
def destroy(self):
self.ctx.pop()
def preprocess_image(self, raw_bgr_image):
"""
description: Convert BGR image to RGB,
resize and pad it to target size, normalize to [0,1],
transform to NCHW format.
param:
input_image_path: str, image path
return:
image: the processed image
image_raw: the original image
h: original height
w: original width
"""
image_raw = raw_bgr_image
h, w, c = image_raw.shape
image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
r_w = self.input_w / w
r_h = self.input_h / h
if r_h > r_w:
tw = self.input_w
th = int(r_w * h)
tx1 = tx2 = 0
ty1 = int((self.input_h - th) / 2)
ty2 = self.input_h - th - ty1
else:
tw = int(r_h * w)
th = self.input_h
tx1 = int((self.input_w - tw) / 2)
tx2 = self.input_w - tw - tx1
ty1 = ty2 = 0
image = cv2.resize(image, (tw, th))
image = cv2.copyMakeBorder(
image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, (128, 128, 128)
)
image = image.astype(np.float32)
image /= 255.0
image = np.transpose(image, [2, 0, 1])
image = np.expand_dims(image, axis=0)
image = np.ascontiguousarray(image)
return image, image_raw, h, w
def xywh2xyxy(self, origin_h, origin_w, x):
"""
description: Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
param:
origin_h: height of original image
origin_w: width of original image
x: A boxes numpy, each row is a box [center_x, center_y, w, h]
return:
y: A boxes numpy, each row is a box [x1, y1, x2, y2]
"""
y = np.zeros_like(x)
r_w = self.input_w / origin_w
r_h = self.input_h / origin_h
if r_h > r_w:
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2 - (self.input_h - r_w * origin_h) / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2 - (self.input_h - r_w * origin_h) / 2
y /= r_w
else:
y[:, 0] = x[:, 0] - x[:, 2] / 2 - (self.input_w - r_h * origin_w) / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2 - (self.input_w - r_h * origin_w) / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
y /= r_h
return y
def post_process(self, output, origin_h, origin_w):
"""
description: postprocess the prediction
param:
output: A numpy likes [num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...]
origin_h: height of original image
origin_w: width of original image
return:
result_boxes: finally boxes, a boxes numpy, each row is a box [x1, y1, x2, y2]
result_scores: finally scores, a numpy, each element is the score correspoing to box
result_classid: finally classid, a numpy, each element is the classid correspoing to box
"""
num = int(output[0])
pred = np.reshape(output[1:], (-1, 6))[:num, :]
boxes = self.non_max_suppression(pred, origin_h, origin_w, conf_thres=self.CONF_THRESH,
nms_thres=self.IOU_THRESHOLD)
result_boxes = boxes[:, :4].astype(int) if len(boxes) else np.array([])
result_scores = boxes[:, 4] if len(boxes) else np.array([])
result_classid = boxes[:, 5].astype(int) if len(boxes) else np.array([])
return result_boxes, result_scores, result_classid
def bbox_iou(self, box1, box2, x1y1x2y2=True):
"""
description: compute the IoU of two bounding boxes
param:
box1: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h))
box2: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h))
x1y1x2y2: select the coordinate format
return:
iou: computed iou
"""
if not x1y1x2y2:
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
else:
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
inter_rect_x1 = np.maximum(b1_x1, b2_x1)
inter_rect_y1 = np.maximum(b1_y1, b2_y1)
inter_rect_x2 = np.minimum(b1_x2, b2_x2)
inter_rect_y2 = np.minimum(b1_y2, b2_y2)
inter_area = np.clip(inter_rect_x2 - inter_rect_x1 + 1, 0, None) * \
np.clip(inter_rect_y2 - inter_rect_y1 + 1, 0, None)
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
return iou
def non_max_suppression(self, prediction, origin_h, origin_w, conf_thres=0.5, nms_thres=0.4):
"""
description: Removes detections with lower object confidence score than 'conf_thres' and performs
Non-Maximum Suppression to further filter detections.
param:
prediction: detections, (x1, y1, x2, y2, conf, cls_id)
origin_h: original image height
origin_w: original image width
conf_thres: a confidence threshold to filter detections
nms_thres: a iou threshold to filter detections
return:
boxes: output after nms with the shape (x1, y1, x2, y2, conf, cls_id)
"""
boxes = prediction[prediction[:, 4] >= conf_thres]
boxes[:, :4] = self.xywh2xyxy(origin_h, origin_w, boxes[:, :4])
boxes[:, 0] = np.clip(boxes[:, 0], 0, origin_w - 1)
boxes[:, 2] = np.clip(boxes[:, 2], 0, origin_w - 1)
boxes[:, 1] = np.clip(boxes[:, 1], 0, origin_h - 1)
boxes[:, 3] = np.clip(boxes[:, 3], 0, origin_h - 1)
confs = boxes[:, 4]
boxes = boxes[np.argsort(-confs)]
keep_boxes = []
while boxes.shape[0]:
large_overlap = self.bbox_iou(np.expand_dims(boxes[0, :4], 0), boxes[:, :4]) > nms_thres
label_match = boxes[0, -1] == boxes[:, -1]
invalid = large_overlap & label_match
keep_boxes += [boxes[0]]
boxes = boxes[~invalid]
boxes = np.stack(keep_boxes, 0) if len(keep_boxes) else np.array([])
return boxes
class inferThread():
def __init__(self, yolov5_wrapper):
self.yolov5_wrapper = yolov5_wrapper
def run(self):
cap = cv2.VideoCapture("test2.mp4")
ret, frame = cap.read()
while ret:
result_boxes, result_scores, result_classid = self.yolov5_wrapper.infer(frame)
out = [{"points": list(points), "conf": conf, "class": class_id} for points, conf, class_id in
zip(result_boxes, result_scores, result_classid)]
print(out)
ret, frame = cap.read()
if __name__ == "__main__":
engine_file_path = "build/yolov5.engine"
categories = ["cement_bag"]
yolov5_wrapper = YoloV5TRT(engine_file_path)
try:
inf = inferThread(yolov5_wrapper)
inf.run()
finally:
yolov5_wrapper.destroy()
test.jpg

320 KB

black_white_ratio_dict = {'ambuja_plus': 1.2, 'acc_gold': 1.2, 'acc_suraksha_power_plus': 1.2, 'ambuja_buildcem': 1.2, 'acc_suraksha_power': 1.2, 'acc_nfr': 1.2, 'acc_concrete_plus': 1.2}
lack_white_ratio = black_white_ratio_dict["{class_name}".format(class_name="ambuja_plus")]
print(lack_white_ratio)
\ No newline at end of file
from os import environ
environ["config"] = '{"MONGO_URI": "mongodb://admin:iLens!8989@192.168.0.220:21017", "MONGO_DATABASE": "ilens_wps", ' \
'"MONGO_DB": "ilens_wps","MONGO_COLLECTION": "janusDeployment", "MONGO_KEY": "deploymentId", ' \
'"MONGO_VALUE": "1b180a0e", "MONGO_SERVICE_COLL": "serviceConfiguration", "MONGO_COLL": ' \
'"serviceConfiguration" } '
import unittest
from edge_engine.edge_processor import Pubs
from edge_engine.common.config import EDGE_CONFIG
from scripts.cement_counter import CementBagCounter
class TestCementBagCounter(unittest.TestCase):
def test__pre_process(self):
self.assertEqual(CementBagCounter(config=EDGE_CONFIG, model_config=EDGE_CONFIG["modelConfig"], pubs=Pubs(),
device_id=EDGE_CONFIG['deviceId'])._pre_process("5"), '5')
import torch
import numpy as np
from numpy import random
from yolov5processor.models.experimental import attempt_load
from yolov5processor.utils.datasets import letterbox
from yolov5processor.utils.general import (check_img_size, non_max_suppression, scale_coords)
from yolov5processor.utils.torch_utils import select_device
class ExecuteInference:
def __init__(self, weight, confidence=0.4, img_size=640, agnostic_nms=False, gpu=False, iou=0.5):
self.weight = weight
self.confidence = confidence
self.gpu = gpu
self.iou = iou
self.agnostic_nms = agnostic_nms
self.img_size = img_size
self.device, self.half = self.inference_device()
self.classes, self.model, self.names, self.colors = self.load_model()
print("Loaded Models...")
def inference_device(self):
if self.gpu:
device = select_device(str(torch.cuda.current_device()))
print("Using GPU Resource(s): {}".format(str(torch.cuda.current_device())))
else:
device = select_device('cpu')
print("Using CPU Resources")
half = device.type != 'cpu'
return device, half
def load_model(self):
model = attempt_load(self.weight, map_location=self.device)
imgsz = check_img_size(self.img_size, s=model.stride.max())
if self.half:
model.half()
names = model.module.names if hasattr(model, 'module') else model.names
print("Yolo v5 Model Classes: {}".format(names))
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
img = torch.zeros((1, 3, imgsz, imgsz), device=self.device)
_ = model(img.half() if self.half else img) if self.device.type != 'cpu' else None
class_map = {index: label for index, label in zip(range(len(names)), names)}
return class_map, model, names, colors
def predict(self, image):
img = letterbox(image, new_shape=self.img_size)[0]
img = img[:, :, ::-1].transpose(2, 0, 1)
img = np.ascontiguousarray(img)
img = torch.from_numpy(img).to(self.device)
img = img.half() if self.half else img.float()
img /= 255.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
pred = self.model(img, augment=False)[0]
pred = non_max_suppression(pred, self.confidence, self.iou, classes=None, agnostic=self.agnostic_nms)
_output = list()
for i, det in enumerate(pred):
if det is not None and len(det):
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], image.shape).round()
for *xyxy, conf, cls in reversed(det):
_output.append({"points": [int(each) for each in xyxy],
"conf": round(float(conf), 4),
"class": self.classes[int(cls)]})
return _output
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Common modules
"""
import logging
import math
import warnings
from copy import copy
from pathlib import Path
import numpy as np
import pandas as pd
import requests
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp
from yolov5processor.utils.datasets import exif_transpose, letterbox
from yolov5processor.utils.general import colorstr, increment_path, make_divisible, non_max_suppression, save_one_box, \
scale_coords, xyxy2xywh
from yolov5processor.utils.plots import Annotator, colors
from yolov5processor.utils.torch_utils import time_sync
LOGGER = logging.getLogger(__name__)
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Conv(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
def forward(self, x):
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
return self.act(self.conv(x))
class DWConv(Conv):
# Depth-wise convolution class
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
class TransformerLayer(nn.Module):
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
def __init__(self, c, num_heads):
super().__init__()
self.q = nn.Linear(c, c, bias=False)
self.k = nn.Linear(c, c, bias=False)
self.v = nn.Linear(c, c, bias=False)
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
self.fc1 = nn.Linear(c, c, bias=False)
self.fc2 = nn.Linear(c, c, bias=False)
def forward(self, x):
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
x = self.fc2(self.fc1(x)) + x
return x
class TransformerBlock(nn.Module):
# Vision Transformer https://arxiv.org/abs/2010.11929
def __init__(self, c1, c2, num_heads, num_layers):
super().__init__()
self.conv = None
if c1 != c2:
self.conv = Conv(c1, c2)
self.linear = nn.Linear(c2, c2) # learnable position embedding
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
self.c2 = c2
def forward(self, x):
if self.conv is not None:
x = self.conv(x)
b, _, w, h = x.shape
p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
class Bottleneck(nn.Module):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
self.cv4 = Conv(2 * c_, c2, 1, 1)
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.LeakyReLU(0.1, inplace=True)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
class C3TR(C3):
# C3 module with TransformerBlock()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = TransformerBlock(c_, c_, 4, n)
class C3SPP(C3):
# C3 module with SPP()
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = SPP(c_, c_, k)
class C3Ghost(C3):
# C3 module with GhostBottleneck()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) # hidden channels
self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)])
class SPP(nn.Module):
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
def __init__(self, c1, c2, k=(5, 9, 13)):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
class SPPF(nn.Module):
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * 4, c2, 1, 1)
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
# self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
# return self.conv(self.contract(x))
class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
super().__init__()
c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act)
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
def forward(self, x):
y = self.cv1(x)
return torch.cat([y, self.cv2(y)], 1)
class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
super().__init__()
c_ = c2 // 2
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
def forward(self, x):
return self.conv(x) + self.shortcut(x)
class Contract(nn.Module):
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
s = self.gain
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
class Expand(nn.Module):
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
s = self.gain
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
class Concat(nn.Module):
# Concatenate a list of tensors along dimension
def __init__(self, dimension=1):
super().__init__()
self.d = dimension
def forward(self, x):
return torch.cat(x, self.d)
class AutoShape(nn.Module):
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
classes = None # (optional list) filter by class
multi_label = False # NMS multiple labels per box
max_det = 1000 # maximum number of detections per image
def __init__(self, model):
super().__init__()
self.model = model.eval()
def autoshape(self):
LOGGER.info('AutoShape already enabled, skipping... ') # model already converted to model.autoshape()
return self
def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
self = super()._apply(fn)
m = self.model.model[-1] # Detect()
m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list):
m.anchor_grid = list(map(fn, m.anchor_grid))
return self
@torch.no_grad()
def forward(self, imgs, size=640, augment=False, profile=False):
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
# file: imgs = 'data/images/zidane.jpg' # str or PosixPath
# URI: = 'https://ultralytics.com/images/zidane.jpg'
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
# numpy: = np.zeros((640,1280,3)) # HWC
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
t = [time_sync()]
p = next(self.model.parameters()) # for device and type
if isinstance(imgs, torch.Tensor): # torch
with amp.autocast(enabled=p.device.type != 'cpu'):
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
# Pre-process
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
for i, im in enumerate(imgs):
f = f'image{i}' # filename
if isinstance(im, (str, Path)): # filename or uri
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
im = np.asarray(exif_transpose(im))
elif isinstance(im, Image.Image): # PIL Image
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
files.append(Path(f).with_suffix('.jpg').name)
if im.shape[0] < 5: # image in CHW
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
s = im.shape[:2] # HWC
shape0.append(s) # image shape
g = (size / max(s)) # gain
shape1.append([y * g for y in s])
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
t.append(time_sync())
with amp.autocast(enabled=p.device.type != 'cpu'):
# Inference
y = self.model(x, augment, profile)[0] # forward
t.append(time_sync())
# Post-process
y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes,
multi_label=self.multi_label, max_det=self.max_det) # NMS
for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i])
t.append(time_sync())
return Detections(imgs, y, files, t, self.names, x.shape)
class Detections:
# YOLOv5 detections class for inference results
def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
super().__init__()
d = pred[0].device # device
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
self.imgs = imgs # list of images as numpy arrays
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
self.names = names # class names
self.files = files # image filenames
self.xyxy = pred # xyxy pixels
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
self.n = len(self.pred) # number of images (batch size)
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
self.s = shape # inference BCHW shape
def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
crops = []
for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
if pred.shape[0]:
for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
if show or save or render or crop:
annotator = Annotator(im, example=str(self.names))
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
label = f'{self.names[int(cls)]} {conf:.2f}'
if crop:
file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
crops.append({'box': box, 'conf': conf, 'cls': cls, 'label': label,
'im': save_one_box(box, im, file=file, save=save)})
else: # all others
annotator.box_label(box, label, color=colors(cls))
im = annotator.im
else:
s += '(no detections)'
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
if pprint:
LOGGER.info(s.rstrip(', '))
if show:
im.show(self.files[i]) # show
if save:
f = self.files[i]
im.save(save_dir / f) # save
if i == self.n - 1:
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
if render:
self.imgs[i] = np.asarray(im)
if crop:
if save:
LOGGER.info(f'Saved results to {save_dir}\n')
return crops
def print(self):
self.display(pprint=True) # print results
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
self.t)
def show(self):
self.display(show=True) # show results
def save(self, save_dir='runs/detect/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
self.display(save=True, save_dir=save_dir) # save results
def crop(self, save=True, save_dir='runs/detect/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
return self.display(crop=True, save=save, save_dir=save_dir) # crop results
def render(self):
self.display(render=True) # render results
return self.imgs
def pandas(self):
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
new = copy(self) # return copy
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
return new
def tolist(self):
# return a list of Detections objects, i.e. 'for result in results.tolist():'
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
for d in x:
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
setattr(d, k, getattr(d, k)[0]) # pop out of list
return x
def __len__(self):
return self.n
class Classify(nn.Module):
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
self.flat = nn.Flatten()
def forward(self, x):
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
return self.flat(self.conv(z)) # flatten to x(b,c2)
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Experimental modules
"""
import numpy as np
import torch
import torch.nn as nn
from yolov5processor.models.common import Conv
from yolov5processor.utils.downloads import attempt_download
class CrossConv(nn.Module):
# Cross Convolution Downsample
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, (1, k), (1, s))
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class Sum(nn.Module):
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
def __init__(self, n, weight=False): # n: number of inputs
super().__init__()
self.weight = weight # apply weights boolean
self.iter = range(n - 1) # iter object
if weight:
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
def forward(self, x):
y = x[0] # no weight
if self.weight:
w = torch.sigmoid(self.w) * 2
for i in self.iter:
y = y + x[i + 1] * w[i]
else:
for i in self.iter:
y = y + x[i + 1]
return y
class MixConv2d(nn.Module):
# Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
super().__init__()
groups = len(k)
if equal_ch: # equal c_ per group
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
else: # equal weight.numel() per group
b = [c2] + [0] * groups
a = np.eye(groups + 1, groups, k=-1)
a -= np.roll(a, 1, axis=1)
a *= np.array(k) ** 2
a[0] = 1
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
self.bn = nn.BatchNorm2d(c2)
self.act = nn.LeakyReLU(0.1, inplace=True)
def forward(self, x):
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
class Ensemble(nn.ModuleList):
# Ensemble of models
def __init__(self):
super().__init__()
def forward(self, x, augment=False, profile=False, visualize=False):
y = []
for module in self:
y.append(module(x, augment, profile, visualize)[0])
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y = torch.cat(y, 1) # nms ensemble
return y, None # inference, train output
def attempt_load(weights, map_location=None, inplace=True, fuse=True):
from yolov5processor.models.yolo import Detect, Model
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch.load(attempt_download(w), map_location=map_location) # load
if fuse:
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model
else:
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval()) # without layer fuse
# Compatibility updates
for m in model.modules():
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:
m.inplace = inplace # pytorch 1.7.0 compatibility
if type(m) is Detect:
if not isinstance(m.anchor_grid, list): # new Detect Layer compatibility
delattr(m, 'anchor_grid')
setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
elif type(m) is Conv:
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
if len(model) == 1:
return model[-1] # return model
else:
print(f'Ensemble created with {weights}\n')
for k in ['names']:
setattr(model, k, getattr(model[-1], k))
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
return model # return ensemble
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Default anchors for COCO data
# P5 -------------------------------------------------------------------------------------------------------------------
# P5-640:
anchors_p5_640:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# P6 -------------------------------------------------------------------------------------------------------------------
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
anchors_p6_640:
- [9,11, 21,19, 17,41] # P3/8
- [43,32, 39,70, 86,64] # P4/16
- [65,131, 134,130, 120,265] # P5/32
- [282,180, 247,354, 512,387] # P6/64
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
anchors_p6_1280:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
anchors_p6_1920:
- [28,41, 67,59, 57,141] # P3/8
- [144,103, 129,227, 270,205] # P4/16
- [209,452, 455,396, 358,812] # P5/32
- [653,922, 1109,570, 1387,1187] # P6/64
# P7 -------------------------------------------------------------------------------------------------------------------
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
anchors_p7_640:
- [11,11, 13,30, 29,20] # P3/8
- [30,46, 61,38, 39,92] # P4/16
- [78,80, 146,66, 79,163] # P5/32
- [149,150, 321,143, 157,303] # P6/64
- [257,402, 359,290, 524,372] # P7/128
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
anchors_p7_1280:
- [19,22, 54,36, 32,77] # P3/8
- [70,83, 138,71, 75,173] # P4/16
- [165,159, 148,334, 375,151] # P5/32
- [334,317, 251,626, 499,474] # P6/64
- [750,326, 534,814, 1079,818] # P7/128
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
anchors_p7_1920:
- [29,34, 81,55, 47,115] # P3/8
- [105,124, 207,107, 113,259] # P4/16
- [247,238, 222,500, 563,227] # P5/32
- [501,476, 376,939, 749,711] # P6/64
- [1126,489, 801,1222, 1618,1227] # P7/128
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3-SPP head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, SPP, [512, [5, 9, 13]]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,14, 23,27, 37,58] # P4/16
- [81,82, 135,169, 344,319] # P5/32
# YOLOv3-tiny backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [16, 3, 1]], # 0
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
[-1, 1, Conv, [32, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
[-1, 1, Conv, [512, 3, 1]],
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
]
# YOLOv3-tiny head
head:
[[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3 head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, Conv, [512, [1, 1]]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]]
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9
]
# YOLOv5 BiFPN head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14, 6], 1, Concat, [1]], # cat P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, BottleneckCSP, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, BottleneckCSP, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 6, BottleneckCSP, [1024]], # 9
]
# YOLOv5 FPN head
head:
[[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large)
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [512, 1, 1]],
[-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium)
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 1, Conv, [256, 1, 1]],
[-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small)
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3, [1024, False]], # 9
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 2], 1, Concat, [1]], # cat backbone P2
[-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
[-1, 1, Conv, [128, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P3
[-1, 3, C3, [256, False]], # 24 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 27 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 30 (P5/32-large)
[[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 1, SPP, [1024, [3, 5, 7]]],
[-1, 3, C3, [1024, False]], # 11
]
# YOLOv5 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P5/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
[-1, 1, SPP, [1280, [3, 5]]],
[-1, 3, C3, [1280, False]], # 13
]
# YOLOv5 head
head:
[[-1, 1, Conv, [1024, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 10], 1, Concat, [1]], # cat backbone P6
[-1, 3, C3, [1024, False]], # 17
[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 21
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 25
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 29 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 26], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 32 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 22], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 35 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
[-1, 1, Conv, [1024, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P7
[-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
[[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, BottleneckCSP, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, BottleneckCSP, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, BottleneckCSP, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, BottleneckCSP, [1024, False]], # 9
]
# YOLOv5 PANet head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, BottleneckCSP, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3Ghost, [128]],
[-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3Ghost, [256]],
[-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3Ghost, [512]],
[-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3Ghost, [1024, False]], # 9
]
# YOLOv5 head
head:
[[-1, 1, GhostConv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3Ghost, [512, False]], # 13
[-1, 1, GhostConv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
[-1, 1, GhostConv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
[-1, 1, GhostConv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
TensorFlow, Keras and TFLite versions of YOLOv5
Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
Usage:
$ python models/tf.py --weights yolov5s.pt
Export:
$ python path/to/export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
"""
import argparse
import logging
import sys
from copy import deepcopy
from pathlib import Path
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
# ROOT = ROOT.relative_to(Path.cwd()) # relative
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn
from tensorflow import keras
from yolov5processor.models.common import Bottleneck, BottleneckCSP, Concat, Conv, C3, DWConv, Focus, SPP, SPPF, autopad
from yolov5processor.models.experimental import CrossConv, MixConv2d, attempt_load
from yolov5processor.models.yolo import Detect
from yolov5processor.utils.general import make_divisible, print_args, set_logging
from yolov5processor.utils.activations import SiLU
LOGGER = logging.getLogger(__name__)
class TFBN(keras.layers.Layer):
# TensorFlow BatchNormalization wrapper
def __init__(self, w=None):
super(TFBN, self).__init__()
self.bn = keras.layers.BatchNormalization(
beta_initializer=keras.initializers.Constant(w.bias.numpy()),
gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
epsilon=w.eps)
def call(self, inputs):
return self.bn(inputs)
class TFPad(keras.layers.Layer):
def __init__(self, pad):
super(TFPad, self).__init__()
self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
def call(self, inputs):
return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
class TFConv(keras.layers.Layer):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
# ch_in, ch_out, weights, kernel, stride, padding, groups
super(TFConv, self).__init__()
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
# TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
# see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
conv = keras.layers.Conv2D(
c2, k, s, 'SAME' if s == 1 else 'VALID', use_bias=False if hasattr(w, 'bn') else True,
kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
# YOLOv5 activations
if isinstance(w.act, nn.LeakyReLU):
self.act = (lambda x: keras.activations.relu(x, alpha=0.1)) if act else tf.identity
elif isinstance(w.act, nn.Hardswish):
self.act = (lambda x: x * tf.nn.relu6(x + 3) * 0.166666667) if act else tf.identity
elif isinstance(w.act, (nn.SiLU, SiLU)):
self.act = (lambda x: keras.activations.swish(x)) if act else tf.identity
else:
raise Exception(f'no matching TensorFlow activation found for {w.act}')
def call(self, inputs):
return self.act(self.bn(self.conv(inputs)))
class TFFocus(keras.layers.Layer):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
# ch_in, ch_out, kernel, stride, padding, groups
super(TFFocus, self).__init__()
self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
# inputs = inputs / 255. # normalize 0-255 to 0-1
return self.conv(tf.concat([inputs[:, ::2, ::2, :],
inputs[:, 1::2, ::2, :],
inputs[:, ::2, 1::2, :],
inputs[:, 1::2, 1::2, :]], 3))
class TFBottleneck(keras.layers.Layer):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion
super(TFBottleneck, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
self.add = shortcut and c1 == c2
def call(self, inputs):
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
class TFConv2d(keras.layers.Layer):
# Substitution for PyTorch nn.Conv2D
def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
super(TFConv2d, self).__init__()
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
self.conv = keras.layers.Conv2D(
c2, k, s, 'VALID', use_bias=bias,
kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None, )
def call(self, inputs):
return self.conv(inputs)
class TFBottleneckCSP(keras.layers.Layer):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
# ch_in, ch_out, number, shortcut, groups, expansion
super(TFBottleneckCSP, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
self.bn = TFBN(w.bn)
self.act = lambda x: keras.activations.relu(x, alpha=0.1)
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
def call(self, inputs):
y1 = self.cv3(self.m(self.cv1(inputs)))
y2 = self.cv2(inputs)
return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
class TFC3(keras.layers.Layer):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
# ch_in, ch_out, number, shortcut, groups, expansion
super(TFC3, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
def call(self, inputs):
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
class TFSPP(keras.layers.Layer):
# Spatial pyramid pooling layer used in YOLOv3-SPP
def __init__(self, c1, c2, k=(5, 9, 13), w=None):
super(TFSPP, self).__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
def call(self, inputs):
x = self.cv1(inputs)
return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
class TFSPPF(keras.layers.Layer):
# Spatial pyramid pooling-Fast layer
def __init__(self, c1, c2, k=5, w=None):
super(TFSPPF, self).__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
def call(self, inputs):
x = self.cv1(inputs)
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
class TFDetect(keras.layers.Layer):
def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer
super(TFDetect, self).__init__()
self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [tf.zeros(1)] * self.nl # init grid
self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]),
[self.nl, 1, -1, 1, 2])
self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
self.training = False # set to False after building model
self.imgsz = imgsz
for i in range(self.nl):
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
self.grid[i] = self._make_grid(nx, ny)
def call(self, inputs):
z = [] # inference output
x = []
for i in range(self.nl):
x.append(self.m[i](inputs[i]))
# x(bs,20,20,255) to x(bs,3,20,20,85)
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
x[i] = tf.transpose(tf.reshape(x[i], [-1, ny * nx, self.na, self.no]), [0, 2, 1, 3])
if not self.training: # inference
y = tf.sigmoid(x[i])
xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
# Normalize xywh to 0-1 to reduce calibration error
xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
y = tf.concat([xy, wh, y[..., 4:]], -1)
z.append(tf.reshape(y, [-1, 3 * ny * nx, self.no]))
return x if self.training else (tf.concat(z, 1), x)
@staticmethod
def _make_grid(nx=20, ny=20):
# yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
# return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
class TFUpsample(keras.layers.Layer):
def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
super(TFUpsample, self).__init__()
assert scale_factor == 2, "scale_factor must be 2"
self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode)
# self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
# with default arguments: align_corners=False, half_pixel_centers=False
# self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
# size=(x.shape[1] * 2, x.shape[2] * 2))
def call(self, inputs):
return self.upsample(inputs)
class TFConcat(keras.layers.Layer):
def __init__(self, dimension=1, w=None):
super(TFConcat, self).__init__()
assert dimension == 1, "convert only NCHW to NHWC concat"
self.d = 3
def call(self, inputs):
return tf.concat(inputs, self.d)
def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m_str = m
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
try:
args[j] = eval(a) if isinstance(a, str) else a # eval strings
except NameError:
pass
n = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in [nn.Conv2d, Conv, Bottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
c1, c2 = ch[f], args[0]
c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
args = [c1, c2, *args[1:]]
if m in [BottleneckCSP, C3]:
args.insert(2, n)
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
elif m is Detect:
args.append([ch[x + 1] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
args.append(imgsz)
else:
c2 = ch[f]
tf_m = eval('TF' + m_str.replace('nn.', ''))
m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
else tf_m(*args, w=model.model[i]) # module
torch_m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum([x.numel() for x in torch_m_.parameters()]) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
ch.append(c2)
return keras.Sequential(layers), sorted(save)
class TFModel:
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes
super(TFModel, self).__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg) as f:
self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
# Define model
if nc and nc != self.yaml['nc']:
print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc))
self.yaml['nc'] = nc # override yaml value
self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
def predict(self, inputs, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45,
conf_thres=0.25):
y = [] # outputs
x = inputs
for i, m in enumerate(self.model.layers):
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
x = m(x) # run
y.append(x if m.i in self.savelist else None) # save output
# Add TensorFlow NMS
if tf_nms:
boxes = self._xywh2xyxy(x[0][..., :4])
probs = x[0][:, :, 4:5]
classes = x[0][:, :, 5:]
scores = probs * classes
if agnostic_nms:
nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
return nms, x[1]
else:
boxes = tf.expand_dims(boxes, 2)
nms = tf.image.combined_non_max_suppression(
boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False)
return nms, x[1]
return x[0] # output only first tensor [1,6300,85] = [xywh, conf, class0, class1, ...]
# x = x[0][0] # [x(1,6300,85), ...] to x(6300,85)
# xywh = x[..., :4] # x(6300,4) boxes
# conf = x[..., 4:5] # x(6300,1) confidences
# cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
# return tf.concat([conf, cls, xywh], 1)
@staticmethod
def _xywh2xyxy(xywh):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
class AgnosticNMS(keras.layers.Layer):
# TF Agnostic NMS
def call(self, input, topk_all, iou_thres, conf_thres):
# wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres), input,
fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
name='agnostic_nms')
@staticmethod
def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS
boxes, classes, scores = x
class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
scores_inp = tf.reduce_max(scores, -1)
selected_inds = tf.image.non_max_suppression(
boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres)
selected_boxes = tf.gather(boxes, selected_inds)
padded_boxes = tf.pad(selected_boxes,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
mode="CONSTANT", constant_values=0.0)
selected_scores = tf.gather(scores_inp, selected_inds)
padded_scores = tf.pad(selected_scores,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
mode="CONSTANT", constant_values=-1.0)
selected_classes = tf.gather(class_inds, selected_inds)
padded_classes = tf.pad(selected_classes,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
mode="CONSTANT", constant_values=-1.0)
valid_detections = tf.shape(selected_inds)[0]
return padded_boxes, padded_scores, padded_classes, valid_detections
def representative_dataset_gen(dataset, ncalib=100):
# Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
for n, (path, img, im0s, vid_cap) in enumerate(dataset):
input = np.transpose(img, [1, 2, 0])
input = np.expand_dims(input, axis=0).astype(np.float32)
input /= 255.0
yield [input]
if n >= ncalib:
break
def run(weights=ROOT / 'yolov5s.pt', # weights path
imgsz=(640, 640), # inference size h,w
batch_size=1, # batch size
dynamic=False, # dynamic batch size
):
# PyTorch model
im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
model = attempt_load(weights, map_location=torch.device('cpu'), inplace=True, fuse=False)
y = model(im) # inference
model.info()
# TensorFlow model
im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
y = tf_model.predict(im) # inference
# Keras model
im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
keras_model.summary()
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(FILE.stem, opt)
return opt
def main(opt):
set_logging()
run(**vars(opt))
if __name__ == "__main__":
opt = parse_opt()
main(opt)
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
YOLO-specific modules
Usage:
$ python path/to/models/yolo.py --cfg yolov5s.yaml
"""
import argparse
import sys
from copy import deepcopy
from pathlib import Path
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
# ROOT = ROOT.relative_to(Path.cwd()) # relative
from yolov5processor.models.common import *
from yolov5processor.models.experimental import *
from yolov5processor.utils.autoanchor import check_anchor_order
from yolov5processor.utils.general import check_yaml, make_divisible, print_args, set_logging
from yolov5processor.utils.plots import feature_visualization
from yolov5processor.utils.torch_utils import copy_attr, fuse_conv_and_bn, initialize_weights, model_info, scale_img, \
select_device, time_sync
try:
import thop # for FLOPs computation
except ImportError:
thop = None
LOGGER = logging.getLogger(__name__)
class Detect(nn.Module):
stride = None # strides computed during build
onnx_dynamic = False # ONNX export parameter
def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
super().__init__()
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [torch.zeros(1)] * self.nl # init grid
self.anchor_grid = [torch.zeros(1)] * self.nl # init anchor grid
self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.inplace = inplace # use in-place ops (e.g. slice assignment)
def forward(self, x):
z = [] # inference output
for i in range(self.nl):
x[i] = self.m[i](x[i]) # conv
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if not self.training: # inference
if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
y = x[i].sigmoid()
if self.inplace:
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, y[..., 4:]), -1)
z.append(y.view(bs, -1, self.no))
return x if self.training else (torch.cat(z, 1), x)
def _make_grid(self, nx=20, ny=20, i=0):
d = self.anchors[i].device
yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)])
grid = torch.stack((xv, yv), 2).expand((1, self.na, ny, nx, 2)).float()
anchor_grid = (self.anchors[i].clone() * self.stride[i]) \
.view((1, self.na, 1, 1, 2)).expand((1, self.na, ny, nx, 2)).float()
return grid, anchor_grid
class Model(nn.Module):
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
super().__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg, errors='ignore') as f:
self.yaml = yaml.safe_load(f) # model dict
# Define model
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
if nc and nc != self.yaml['nc']:
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
self.yaml['nc'] = nc # override yaml value
if anchors:
LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
self.yaml['anchors'] = round(anchors) # override yaml value
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
self.names = [str(i) for i in range(self.yaml['nc'])] # default names
self.inplace = self.yaml.get('inplace', True)
# Build strides, anchors
m = self.model[-1] # Detect()
if isinstance(m, Detect):
s = 256 # 2x min stride
m.inplace = self.inplace
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
m.anchors /= m.stride.view(-1, 1, 1)
check_anchor_order(m)
self.stride = m.stride
self._initialize_biases() # only run once
# Init weights, biases
initialize_weights(self)
self.info()
LOGGER.info('')
def forward(self, x, augment=False, profile=False, visualize=False):
if augment:
return self._forward_augment(x) # augmented inference, None
return self._forward_once(x, profile, visualize) # single-scale inference, train
def _forward_augment(self, x):
img_size = x.shape[-2:] # height, width
s = [1, 0.83, 0.67] # scales
f = [None, 3, None] # flips (2-ud, 3-lr)
y = [] # outputs
for si, fi in zip(s, f):
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
yi = self._forward_once(xi)[0] # forward
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
yi = self._descale_pred(yi, fi, si, img_size)
y.append(yi)
y = self._clip_augmented(y) # clip augmented tails
return torch.cat(y, 1), None # augmented inference, train
def _forward_once(self, x, profile=False, visualize=False):
y, dt = [], [] # outputs
for m in self.model:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
if profile:
self._profile_one_layer(m, x, dt)
x = m(x) # run
y.append(x if m.i in self.save else None) # save output
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
return x
def _descale_pred(self, p, flips, scale, img_size):
# de-scale predictions following augmented inference (inverse operation)
if self.inplace:
p[..., :4] /= scale # de-scale
if flips == 2:
p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
elif flips == 3:
p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
else:
x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
if flips == 2:
y = img_size[0] - y # de-flip ud
elif flips == 3:
x = img_size[1] - x # de-flip lr
p = torch.cat((x, y, wh, p[..., 4:]), -1)
return p
def _clip_augmented(self, y):
# Clip YOLOv5 augmented inference tails
nl = self.model[-1].nl # number of detection layers (P3-P5)
g = sum(4 ** x for x in range(nl)) # grid points
e = 1 # exclude layer count
i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices
y[0] = y[0][:, :-i] # large
i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
y[-1] = y[-1][:, i:] # small
return y
def _profile_one_layer(self, m, x, dt):
c = isinstance(m, Detect) # is final layer, copy input as inplace fix
o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
t = time_sync()
for _ in range(10):
m(x.copy() if c else x)
dt.append((time_sync() - t) * 100)
if m == self.model[0]:
LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} {'module'}")
LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}')
if c:
LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
# https://arxiv.org/abs/1708.02002 section 3.3
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
m = self.model[-1] # Detect() module
for mi, s in zip(m.m, m.stride): # from
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
def _print_biases(self):
m = self.model[-1] # Detect() module
for mi in m.m: # from
b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
LOGGER.info(
('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
# def _print_weights(self):
# for m in self.model.modules():
# if type(m) is Bottleneck:
# LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
LOGGER.info('Fusing layers... ')
for m in self.model.modules():
if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
delattr(m, 'bn') # remove batchnorm
m.forward = m.forward_fuse # update forward
self.info()
return self
def autoshape(self): # add AutoShape module
LOGGER.info('Adding AutoShape... ')
m = AutoShape(self) # wrap model
copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes
return m
def info(self, verbose=False, img_size=640): # print model information
model_info(self, verbose, img_size)
def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
self = super()._apply(fn)
m = self.model[-1] # Detect()
if isinstance(m, Detect):
m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list):
m.anchor_grid = list(map(fn, m.anchor_grid))
return self
def parse_model(d, ch): # model_dict, input_channels(3)
LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
try:
args[j] = eval(a) if isinstance(a, str) else a # eval strings
except NameError:
pass
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost]:
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]]
if m in [BottleneckCSP, C3, C3TR, C3Ghost]:
args.insert(2, n) # number of repeats
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum([ch[x] for x in f])
elif m is Detect:
args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
elif m is Contract:
c2 = ch[f] * args[0] ** 2
elif m is Expand:
c2 = ch[f] // args[0] ** 2
else:
c2 = ch[f]
m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum([x.numel() for x in m_.parameters()]) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n_, np, t, args)) # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--profile', action='store_true', help='profile model speed')
opt = parser.parse_args()
opt.cfg = check_yaml(opt.cfg) # check YAML
print_args(FILE.stem, opt)
set_logging()
device = select_device(opt.device)
# Create model
model = Model(opt.cfg).to(device)
model.train()
# Profile
if opt.profile:
img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
y = model(img, profile=True)
# Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898)
# from torch.utils.tensorboard import SummaryWriter
# tb_writer = SummaryWriter('.')
# LOGGER.info("Run 'tensorboard --logdir=models' to view tensorboard at http://localhost:6006/")
# tb_writer.add_graph(torch.jit.trace(model, img, strict=False), []) # add model graph
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Activation functions
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
# SiLU https://arxiv.org/pdf/1606.08415.pdf ----------------------------------------------------------------------------
class SiLU(nn.Module): # export-friendly version of nn.SiLU()
@staticmethod
def forward(x):
return x * torch.sigmoid(x)
class Hardswish(nn.Module): # export-friendly version of nn.Hardswish()
@staticmethod
def forward(x):
# return x * F.hardsigmoid(x) # for torchscript and CoreML
return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX
# Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
class Mish(nn.Module):
@staticmethod
def forward(x):
return x * F.softplus(x).tanh()
class MemoryEfficientMish(nn.Module):
class F(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
ctx.save_for_backward(x)
return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
@staticmethod
def backward(ctx, grad_output):
x = ctx.saved_tensors[0]
sx = torch.sigmoid(x)
fx = F.softplus(x).tanh()
return grad_output * (fx + x * sx * (1 - fx * fx))
def forward(self, x):
return self.F.apply(x)
# FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
class FReLU(nn.Module):
def __init__(self, c1, k=3): # ch_in, kernel
super().__init__()
self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
self.bn = nn.BatchNorm2d(c1)
def forward(self, x):
return torch.max(x, self.bn(self.conv(x)))
# ACON https://arxiv.org/pdf/2009.04759.pdf ----------------------------------------------------------------------------
class AconC(nn.Module):
r""" ACON activation (activate or not).
AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
"""
def __init__(self, c1):
super().__init__()
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.beta = nn.Parameter(torch.ones(1, c1, 1, 1))
def forward(self, x):
dpx = (self.p1 - self.p2) * x
return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x
class MetaAconC(nn.Module):
r""" ACON activation (activate or not).
MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network
according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
"""
def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r
super().__init__()
c2 = max(r, c1 // r)
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True)
self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True)
# self.bn1 = nn.BatchNorm2d(c2)
# self.bn2 = nn.BatchNorm2d(c1)
def forward(self, x):
y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True)
# batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891
# beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable
beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed
dpx = (self.p1 - self.p2) * x
return dpx * torch.sigmoid(beta * dpx) + self.p2 * x
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Image augmentation functions
"""
import logging
import math
import random
import cv2
import numpy as np
from yolov5processor.utils.general import colorstr, segment2box, resample_segments, check_version
from yolov5processor.utils.metrics import bbox_ioa
class Albumentations:
# YOLOv5 Albumentations class (optional, only used if package is installed)
def __init__(self):
self.transform = None
try:
import albumentations as A
check_version(A.__version__, '1.0.3') # version requirement
self.transform = A.Compose([
A.Blur(p=0.01),
A.MedianBlur(p=0.01),
A.ToGray(p=0.01),
A.CLAHE(p=0.01),
A.RandomBrightnessContrast(p=0.0),
A.RandomGamma(p=0.0),
A.ImageCompression(quality_lower=75, p=0.0)],
bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
except ImportError: # package not installed, skip
pass
except Exception as e:
logging.info(colorstr('albumentations: ') + f'{e}')
def __call__(self, im, labels, p=1.0):
if self.transform and random.random() < p:
new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed
im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
return im, labels
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
# HSV color-space augmentation
if hgain or sgain or vgain:
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
dtype = im.dtype # uint8
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
def hist_equalize(im, clahe=True, bgr=False):
# Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
if clahe:
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
yuv[:, :, 0] = c.apply(yuv[:, :, 0])
else:
yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
def replicate(im, labels):
# Replicate labels
h, w = im.shape[:2]
boxes = labels[:, 1:].astype(int)
x1, y1, x2, y2 = boxes.T
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
x1b, y1b, x2b, y2b = boxes[i]
bh, bw = y2b - y1b, x2b - x1b
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax]
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
return im, labels
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
border=(0, 0)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
width = im.shape[1] + border[1] * 2
# Center
C = np.eye(3)
C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
# Perspective
P = np.eye(3)
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
# Rotation and Scale
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(1 - scale, 1 + scale)
# s = 2 ** random.uniform(-scale, scale)
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
# Shear
S = np.eye(3)
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
# Translation
T = np.eye(3)
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
# Combined rotation matrix
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
if perspective:
im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
else: # affine
im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
# Visualize
# import matplotlib.pyplot as plt
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
# ax[0].imshow(im[:, :, ::-1]) # base
# ax[1].imshow(im2[:, :, ::-1]) # warped
# Transform label coordinates
n = len(targets)
if n:
use_segments = any(x.any() for x in segments)
new = np.zeros((n, 4))
if use_segments: # warp segments
segments = resample_segments(segments) # upsample
for i, segment in enumerate(segments):
xy = np.ones((len(segment), 3))
xy[:, :2] = segment
xy = xy @ M.T # transform
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
# clip
new[i] = segment2box(xy, width, height)
else: # warp boxes
xy = np.ones((n * 4, 3))
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
# filter candidates
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
targets = targets[i]
targets[:, 1:5] = new[i]
return im, targets
def copy_paste(im, labels, segments, p=0.5):
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
n = len(segments)
if p and n:
h, w, c = im.shape # height, width, channels
im_new = np.zeros(im.shape, np.uint8)
for j in random.sample(range(n), k=round(p * n)):
l, s = labels[j], segments[j]
box = w - l[3], l[2], w - l[1], l[4]
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
labels = np.concatenate((labels, [[l[0], *box]]), 0)
segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
result = cv2.bitwise_and(src1=im, src2=im_new)
result = cv2.flip(result, 1) # augment segments (flip left-right)
i = result > 0 # pixels to replace
# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug
return im, labels, segments
def cutout(im, labels, p=0.5):
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
if random.random() < p:
h, w = im.shape[:2]
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
for s in scales:
mask_h = random.randint(1, int(h * s)) # create random masks
mask_w = random.randint(1, int(w * s))
# box
xmin = max(0, random.randint(0, w) - mask_w // 2)
ymin = max(0, random.randint(0, h) - mask_h // 2)
xmax = min(w, xmin + mask_w)
ymax = min(h, ymin + mask_h)
# apply random color mask
im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
# return unobscured labels
if len(labels) and s > 0.03:
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
labels = labels[ioa < 0.60] # remove >60% obscured labels
return labels
def mixup(im, labels, im2, labels2):
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
im = (im * r + im2 * (1 - r)).astype(np.uint8)
labels = np.concatenate((labels, labels2), 0)
return im, labels
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Auto-anchor utils
"""
import random
import numpy as np
import torch
import yaml
from tqdm import tqdm
from yolov5processor.utils.general import colorstr
def check_anchor_order(m):
# Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
a = m.anchors.prod(-1).view(-1) # anchor area
da = a[-1] - a[0] # delta a
ds = m.stride[-1] - m.stride[0] # delta s
if da.sign() != ds.sign(): # same order
print('Reversing anchor order')
m.anchors[:] = m.anchors.flip(0)
def check_anchors(dataset, model, thr=4.0, imgsz=640):
# Check anchor fit to data, recompute if necessary
prefix = colorstr('autoanchor: ')
print(f'\n{prefix}Analyzing anchors... ', end='')
m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect()
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh
def metric(k): # compute metric
r = wh[:, None] / k[None]
x = torch.min(r, 1. / r).min(2)[0] # ratio metric
best = x.max(1)[0] # best_x
aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold
bpr = (best > 1. / thr).float().mean() # best possible recall
return bpr, aat
anchors = m.anchors.clone() * m.stride.to(m.anchors.device).view(-1, 1, 1) # current anchors
bpr, aat = metric(anchors.cpu().view(-1, 2))
print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='')
if bpr < 0.98: # threshold to recompute
print('. Attempting to improve anchors, please wait...')
na = m.anchors.numel() // 2 # number of anchors
try:
anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
except Exception as e:
print(f'{prefix}ERROR: {e}')
new_bpr = metric(anchors)[0]
if new_bpr > bpr: # replace anchors
anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss
check_anchor_order(m)
print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
else:
print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.')
print('') # newline
def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
""" Creates kmeans-evolved anchors from training dataset
Arguments:
dataset: path to data.yaml, or a loaded dataset
n: number of anchors
img_size: image size used for training
thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
gen: generations to evolve anchors using genetic algorithm
verbose: print all results
Return:
k: kmeans evolved anchors
Usage:
from utils.autoanchor import *; _ = kmean_anchors()
"""
from scipy.cluster.vq import kmeans
thr = 1. / thr
prefix = colorstr('autoanchor: ')
def metric(k, wh): # compute metrics
r = wh[:, None] / k[None]
x = torch.min(r, 1. / r).min(2)[0] # ratio metric
# x = wh_iou(wh, torch.tensor(k)) # iou metric
return x, x.max(1)[0] # x, best_x
def anchor_fitness(k): # mutation fitness
_, best = metric(torch.tensor(k, dtype=torch.float32), wh)
return (best * (best > thr).float()).mean() # fitness
def print_results(k):
k = k[np.argsort(k.prod(1))] # sort small to large
x, best = metric(k, wh0)
bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr')
print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, '
f'past_thr={x[x > thr].mean():.3f}-mean: ', end='')
for i, x in enumerate(k):
print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg
return k
if isinstance(dataset, str): # *.yaml file
with open(dataset, errors='ignore') as f:
data_dict = yaml.safe_load(f) # model dict
from utils.datasets import LoadImagesAndLabels
dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
# Get label wh
shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
# Filter
i = (wh0 < 3.0).any(1).sum()
if i:
print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
# wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
# Kmeans calculation
print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...')
s = wh.std(0) # sigmas for whitening
k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
assert len(k) == n, f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}'
k *= s
wh = torch.tensor(wh, dtype=torch.float32) # filtered
wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
k = print_results(k)
# Plot
# k, d = [None] * 20, [None] * 20
# for i in tqdm(range(1, 21)):
# k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
# fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
# ax = ax.ravel()
# ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
# fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
# ax[0].hist(wh[wh[:, 0]<100, 0],400)
# ax[1].hist(wh[wh[:, 1]<100, 1],400)
# fig.savefig('wh.png', dpi=200)
# Evolve
npr = np.random
f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar
for _ in pbar:
v = np.ones(sh)
while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
kg = (k.copy() * v).clip(min=2.0)
fg = anchor_fitness(kg)
if fg > f:
f, k = fg, kg.copy()
pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
if verbose:
print_results(k)
return print_results(k)
# AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/
# This script will run on every instance restart, not only on first start
# --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA ---
Content-Type: multipart/mixed; boundary="//"
MIME-Version: 1.0
--//
Content-Type: text/cloud-config; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="cloud-config.txt"
#cloud-config
cloud_final_modules:
- [scripts-user, always]
--//
Content-Type: text/x-shellscript; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="userdata.txt"
#!/bin/bash
# --- paste contents of userdata.sh here ---
--//
# Resume all interrupted trainings in yolov5/ dir including DDP trainings
# Usage: $ python utils/aws/resume.py
import os
import sys
from pathlib import Path
import torch
import yaml
FILE = Path(__file__).resolve()
ROOT = FILE.parents[2] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
port = 0 # --master_port
path = Path('').resolve()
for last in path.rglob('*/**/last.pt'):
ckpt = torch.load(last)
if ckpt['optimizer'] is None:
continue
# Load opt.yaml
with open(last.parent.parent / 'opt.yaml', errors='ignore') as f:
opt = yaml.safe_load(f)
# Get device count
d = opt['device'].split(',') # devices
nd = len(d) # number of devices
ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel
if ddp: # multi-GPU
port += 1
cmd = f'python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
else: # single-GPU
cmd = f'python train.py --resume {last}'
cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread
print(cmd)
os.system(cmd)
#!/bin/bash
# AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html
# This script will run only once on first instance start (for a re-start script see mime.sh)
# /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir
# Use >300 GB SSD
cd home/ubuntu
if [ ! -d yolov5 ]; then
echo "Running first-time script." # install dependencies, download COCO, pull Docker
git clone https://github.com/ultralytics/yolov5 -b master && sudo chmod -R 777 yolov5
cd yolov5
bash data/scripts/get_coco.sh && echo "COCO done." &
sudo docker pull ultralytics/yolov5:latest && echo "Docker done." &
python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." &
wait && echo "All tasks done." # finish background tasks
else
echo "Running re-start script." # resume interrupted runs
i=0
list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour'
while IFS= read -r id; do
((i++))
echo "restarting container $i: $id"
sudo docker start $id
# sudo docker exec -it $id python train.py --resume # single-GPU
sudo docker exec -d $id python utils/aws/resume.py # multi-scenario
done <<<"$list"
fi
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Callback utils
"""
class Callbacks:
""""
Handles all registered callbacks for YOLOv5 Hooks
"""
# Define the available callbacks
_callbacks = {
'on_pretrain_routine_start': [],
'on_pretrain_routine_end': [],
'on_train_start': [],
'on_train_epoch_start': [],
'on_train_batch_start': [],
'optimizer_step': [],
'on_before_zero_grad': [],
'on_train_batch_end': [],
'on_train_epoch_end': [],
'on_val_start': [],
'on_val_batch_start': [],
'on_val_image_end': [],
'on_val_batch_end': [],
'on_val_end': [],
'on_fit_epoch_end': [], # fit = train + val
'on_model_save': [],
'on_train_end': [],
'teardown': [],
}
def register_action(self, hook, name='', callback=None):
"""
Register a new action to a callback hook
Args:
hook The callback hook name to register the action to
name The name of the action for later reference
callback The callback to fire
"""
assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
assert callable(callback), f"callback '{callback}' is not callable"
self._callbacks[hook].append({'name': name, 'callback': callback})
def get_registered_actions(self, hook=None):
""""
Returns all the registered actions by callback hook
Args:
hook The name of the hook to check, defaults to all
"""
if hook:
return self._callbacks[hook]
else:
return self._callbacks
def run(self, hook, *args, **kwargs):
"""
Loop through the registered actions and fire all callbacks
Args:
hook The name of the hook to check, defaults to all
args Arguments to receive from YOLOv5
kwargs Keyword Arguments to receive from YOLOv5
"""
assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
for logger in self._callbacks[hook]:
logger['callback'](*args, **kwargs)
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Dataloaders and dataset utils
"""
import glob
import hashlib
import json
import logging
import os
import random
import shutil
import time
from itertools import repeat
from multiprocessing.pool import ThreadPool, Pool
from pathlib import Path
from threading import Thread
from zipfile import ZipFile
import cv2
import numpy as np
import torch
import torch.nn.functional as F
import yaml
from PIL import Image, ExifTags
from torch.utils.data import Dataset
from tqdm import tqdm
from yolov5processor.utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
from yolov5processor.utils.general import check_dataset, check_requirements, check_yaml, clean_str, segments2boxes, \
xywh2xyxy, xywhn2xyxy, xyxy2xywhn, xyn2xy
from yolov5processor.utils.torch_utils import torch_distributed_zero_first
# Parameters
HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
IMG_FORMATS = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo'] # acceptable image suffixes
VID_FORMATS = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
NUM_THREADS = min(8, os.cpu_count()) # number of multiprocessing threads
# Get orientation exif tag
for orientation in ExifTags.TAGS.keys():
if ExifTags.TAGS[orientation] == 'Orientation':
break
def get_hash(paths):
# Returns a single hash value of a list of paths (files or dirs)
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
h = hashlib.md5(str(size).encode()) # hash sizes
h.update(''.join(paths).encode()) # hash paths
return h.hexdigest() # return hash
def exif_size(img):
# Returns exif-corrected PIL size
s = img.size # (width, height)
try:
rotation = dict(img._getexif().items())[orientation]
if rotation == 6: # rotation 270
s = (s[1], s[0])
elif rotation == 8: # rotation 90
s = (s[1], s[0])
except:
pass
return s
def exif_transpose(image):
"""
Transpose a PIL image accordingly if it has an EXIF Orientation tag.
From https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py
:param image: The image to transpose.
:return: An image.
"""
exif = image.getexif()
orientation = exif.get(0x0112, 1) # default 1
if orientation > 1:
method = {2: Image.FLIP_LEFT_RIGHT,
3: Image.ROTATE_180,
4: Image.FLIP_TOP_BOTTOM,
5: Image.TRANSPOSE,
6: Image.ROTATE_270,
7: Image.TRANSVERSE,
8: Image.ROTATE_90,
}.get(orientation)
if method is not None:
image = image.transpose(method)
del exif[0x0112]
image.info["exif"] = exif.tobytes()
return image
def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix=''):
# Make sure only the first process in DDP process the dataset first, and the following others can use the cache
with torch_distributed_zero_first(rank):
dataset = LoadImagesAndLabels(path, imgsz, batch_size,
augment=augment, # augment images
hyp=hyp, # augmentation hyperparameters
rect=rect, # rectangular training
cache_images=cache,
single_cls=single_cls,
stride=int(stride),
pad=pad,
image_weights=image_weights,
prefix=prefix)
batch_size = min(batch_size, len(dataset))
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers]) # number of workers
sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
loader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader
# Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()
dataloader = loader(dataset,
batch_size=batch_size,
num_workers=nw,
sampler=sampler,
pin_memory=True,
collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn)
return dataloader, dataset
class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
""" Dataloader that reuses workers
Uses same syntax as vanilla DataLoader
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
self.iterator = super().__iter__()
def __len__(self):
return len(self.batch_sampler.sampler)
def __iter__(self):
for i in range(len(self)):
yield next(self.iterator)
class _RepeatSampler(object):
""" Sampler that repeats forever
Args:
sampler (Sampler)
"""
def __init__(self, sampler):
self.sampler = sampler
def __iter__(self):
while True:
yield from iter(self.sampler)
class LoadImages:
# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
def __init__(self, path, img_size=640, stride=32, auto=True):
p = str(Path(path).resolve()) # os-agnostic absolute path
if '*' in p:
files = sorted(glob.glob(p, recursive=True)) # glob
elif os.path.isdir(p):
files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
elif os.path.isfile(p):
files = [p] # files
else:
raise Exception(f'ERROR: {p} does not exist')
images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
ni, nv = len(images), len(videos)
self.img_size = img_size
self.stride = stride
self.files = images + videos
self.nf = ni + nv # number of files
self.video_flag = [False] * ni + [True] * nv
self.mode = 'image'
self.auto = auto
if any(videos):
self.new_video(videos[0]) # new video
else:
self.cap = None
assert self.nf > 0, f'No images or videos found in {p}. ' \
f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
def __iter__(self):
self.count = 0
return self
def __next__(self):
if self.count == self.nf:
raise StopIteration
path = self.files[self.count]
if self.video_flag[self.count]:
# Read video
self.mode = 'video'
ret_val, img0 = self.cap.read()
if not ret_val:
self.count += 1
self.cap.release()
if self.count == self.nf: # last video
raise StopIteration
else:
path = self.files[self.count]
self.new_video(path)
ret_val, img0 = self.cap.read()
self.frame += 1
print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ', end='')
else:
# Read image
self.count += 1
img0 = cv2.imread(path) # BGR
assert img0 is not None, 'Image Not Found ' + path
print(f'image {self.count}/{self.nf} {path}: ', end='')
# Padded resize
img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0]
# Convert
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
return path, img, img0, self.cap
def new_video(self, path):
self.frame = 0
self.cap = cv2.VideoCapture(path)
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
def __len__(self):
return self.nf # number of files
class LoadWebcam: # for inference
# YOLOv5 local webcam dataloader, i.e. `python detect.py --source 0`
def __init__(self, pipe='0', img_size=640, stride=32):
self.img_size = img_size
self.stride = stride
self.pipe = eval(pipe) if pipe.isnumeric() else pipe
self.cap = cv2.VideoCapture(self.pipe) # video capture object
self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
if cv2.waitKey(1) == ord('q'): # q to quit
self.cap.release()
cv2.destroyAllWindows()
raise StopIteration
# Read frame
ret_val, img0 = self.cap.read()
img0 = cv2.flip(img0, 1) # flip left-right
# Print
assert ret_val, f'Camera Error {self.pipe}'
img_path = 'webcam.jpg'
print(f'webcam {self.count}: ', end='')
# Padded resize
img = letterbox(img0, self.img_size, stride=self.stride)[0]
# Convert
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
return img_path, img, img0, None
def __len__(self):
return 0
class LoadStreams:
# YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams`
def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True):
self.mode = 'stream'
self.img_size = img_size
self.stride = stride
if os.path.isfile(sources):
with open(sources, 'r') as f:
sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
else:
sources = [sources]
n = len(sources)
self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
self.sources = [clean_str(x) for x in sources] # clean source names for later
self.auto = auto
for i, s in enumerate(sources): # index, source
# Start thread to read frames from video stream
print(f'{i + 1}/{n}: {s}... ', end='')
if 'youtube.com/' in s or 'youtu.be/' in s: # if source is YouTube video
check_requirements(('pafy', 'youtube_dl'))
import pafy
s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL
s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
cap = cv2.VideoCapture(s)
assert cap.isOpened(), f'Failed to open {s}'
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100, 0) or 30.0 # 30 FPS fallback
self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback
_, self.imgs[i] = cap.read() # guarantee first frame
self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True)
print(f" success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")
self.threads[i].start()
print('') # newline
# check for common shapes
s = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0].shape for x in self.imgs])
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
if not self.rect:
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
def update(self, i, cap, stream):
# Read stream `i` frames in daemon thread
n, f, read = 0, self.frames[i], 1 # frame number, frame array, inference every 'read' frame
while cap.isOpened() and n < f:
n += 1
# _, self.imgs[index] = cap.read()
cap.grab()
if n % read == 0:
success, im = cap.retrieve()
if success:
self.imgs[i] = im
else:
print('WARNING: Video stream unresponsive, please check your IP camera connection.')
self.imgs[i] *= 0
cap.open(stream) # re-open stream if signal was lost
time.sleep(1 / self.fps[i]) # wait time
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
# Letterbox
img0 = self.imgs.copy()
img = [letterbox(x, self.img_size, stride=self.stride, auto=self.rect and self.auto)[0] for x in img0]
# Stack
img = np.stack(img, 0)
# Convert
img = img[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW
img = np.ascontiguousarray(img)
return self.sources, img, img0, None
def __len__(self):
return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years
def img2label_paths(img_paths):
# Define label paths as a function of image paths
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
class LoadImagesAndLabels(Dataset):
# YOLOv5 train_loader/val_loader, loads images and labels for training and validation
cache_version = 0.5 # dataset labels *.cache version
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
self.img_size = img_size
self.augment = augment
self.hyp = hyp
self.image_weights = image_weights
self.rect = False if image_weights else rect
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
self.mosaic_border = [-img_size // 2, -img_size // 2]
self.stride = stride
self.path = path
self.albumentations = Albumentations() if augment else None
try:
f = [] # image files
for p in path if isinstance(path, list) else [path]:
p = Path(p) # os-agnostic
if p.is_dir(): # dir
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
# f = list(p.rglob('**/*.*')) # pathlib
elif p.is_file(): # file
with open(p, 'r') as t:
t = t.read().strip().splitlines()
parent = str(p.parent) + os.sep
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
else:
raise Exception(f'{prefix}{p} does not exist')
self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS])
# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib
assert self.img_files, f'{prefix}No images found'
except Exception as e:
raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}')
# Check cache
self.label_files = img2label_paths(self.img_files) # labels
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
try:
cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict
assert cache['version'] == self.cache_version # same version
assert cache['hash'] == get_hash(self.label_files + self.img_files) # same hash
except:
cache, exists = self.cache_labels(cache_path, prefix), False # cache
# Display cache
nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total
if exists:
d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results
if cache['msgs']:
logging.info('\n'.join(cache['msgs'])) # display warnings
assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}'
# Read cache
[cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items
labels, shapes, self.segments = zip(*cache.values())
self.labels = list(labels)
self.shapes = np.array(shapes, dtype=np.float64)
self.img_files = list(cache.keys()) # update
self.label_files = img2label_paths(cache.keys()) # update
n = len(shapes) # number of images
bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
nb = bi[-1] + 1 # number of batches
self.batch = bi # batch index of image
self.n = n
self.indices = range(n)
# Update labels
include_class = [] # filter labels to include only these classes (optional)
include_class_array = np.array(include_class).reshape(1, -1)
for i, (label, segment) in enumerate(zip(self.labels, self.segments)):
if include_class:
j = (label[:, 0:1] == include_class_array).any(1)
self.labels[i] = label[j]
if segment:
self.segments[i] = segment[j]
if single_cls: # single-class training, merge all classes into 0
self.labels[i][:, 0] = 0
if segment:
self.segments[i][:, 0] = 0
# Rectangular Training
if self.rect:
# Sort by aspect ratio
s = self.shapes # wh
ar = s[:, 1] / s[:, 0] # aspect ratio
irect = ar.argsort()
self.img_files = [self.img_files[i] for i in irect]
self.label_files = [self.label_files[i] for i in irect]
self.labels = [self.labels[i] for i in irect]
self.shapes = s[irect] # wh
ar = ar[irect]
# Set training image shapes
shapes = [[1, 1]] * nb
for i in range(nb):
ari = ar[bi == i]
mini, maxi = ari.min(), ari.max()
if maxi < 1:
shapes[i] = [maxi, 1]
elif mini > 1:
shapes[i] = [1, 1 / mini]
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
self.imgs, self.img_npy = [None] * n, [None] * n
if cache_images:
if cache_images == 'disk':
self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')
self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]
self.im_cache_dir.mkdir(parents=True, exist_ok=True)
gb = 0 # Gigabytes of cached images
self.img_hw0, self.img_hw = [None] * n, [None] * n
results = ThreadPool(NUM_THREADS).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))
pbar = tqdm(enumerate(results), total=n)
for i, x in pbar:
if cache_images == 'disk':
if not self.img_npy[i].exists():
np.save(self.img_npy[i].as_posix(), x[0])
gb += self.img_npy[i].stat().st_size
else:
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
gb += self.imgs[i].nbytes
pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'
pbar.close()
def cache_labels(self, path=Path('./labels.cache'), prefix=''):
# Cache dataset labels, check images and read shapes
x = {} # dict
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
with Pool(NUM_THREADS) as pool:
pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
desc=desc, total=len(self.img_files))
for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
nm += nm_f
nf += nf_f
ne += ne_f
nc += nc_f
if im_file:
x[im_file] = [l, shape, segments]
if msg:
msgs.append(msg)
pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
pbar.close()
if msgs:
logging.info('\n'.join(msgs))
if nf == 0:
logging.info(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}')
x['hash'] = get_hash(self.label_files + self.img_files)
x['results'] = nf, nm, ne, nc, len(self.img_files)
x['msgs'] = msgs # warnings
x['version'] = self.cache_version # cache version
try:
np.save(path, x) # save cache for next time
path.with_suffix('.cache.npy').rename(path) # remove .npy suffix
logging.info(f'{prefix}New cache created: {path}')
except Exception as e:
logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable
return x
def __len__(self):
return len(self.img_files)
# def __iter__(self):
# self.count = -1
# print('ran dataset iter')
# #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
# return self
def __getitem__(self, index):
index = self.indices[index] # linear, shuffled, or image_weights
hyp = self.hyp
mosaic = self.mosaic and random.random() < hyp['mosaic']
if mosaic:
# Load mosaic
img, labels = load_mosaic(self, index)
shapes = None
# MixUp augmentation
if random.random() < hyp['mixup']:
img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1)))
else:
# Load image
img, (h0, w0), (h, w) = load_image(self, index)
# Letterbox
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
labels = self.labels[index].copy()
if labels.size: # normalized xywh to pixel xyxy format
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
if self.augment:
img, labels = random_perspective(img, labels,
degrees=hyp['degrees'],
translate=hyp['translate'],
scale=hyp['scale'],
shear=hyp['shear'],
perspective=hyp['perspective'])
nl = len(labels) # number of labels
if nl:
labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
if self.augment:
# Albumentations
img, labels = self.albumentations(img, labels)
nl = len(labels) # update after albumentations
# HSV color-space
augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
# Flip up-down
if random.random() < hyp['flipud']:
img = np.flipud(img)
if nl:
labels[:, 2] = 1 - labels[:, 2]
# Flip left-right
if random.random() < hyp['fliplr']:
img = np.fliplr(img)
if nl:
labels[:, 1] = 1 - labels[:, 1]
# Cutouts
# labels = cutout(img, labels, p=0.5)
labels_out = torch.zeros((nl, 6))
if nl:
labels_out[:, 1:] = torch.from_numpy(labels)
# Convert
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
return torch.from_numpy(img), labels_out, self.img_files[index], shapes
@staticmethod
def collate_fn(batch):
img, label, path, shapes = zip(*batch) # transposed
for i, l in enumerate(label):
l[:, 0] = i # add target image index for build_targets()
return torch.stack(img, 0), torch.cat(label, 0), path, shapes
@staticmethod
def collate_fn4(batch):
img, label, path, shapes = zip(*batch) # transposed
n = len(shapes) // 4
img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
ho = torch.tensor([[0., 0, 0, 1, 0, 0]])
wo = torch.tensor([[0., 0, 1, 0, 0, 0]])
s = torch.tensor([[1, 1, .5, .5, .5, .5]]) # scale
for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW
i *= 4
if random.random() < 0.5:
im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[
0].type(img[i].type())
l = label[i]
else:
im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
img4.append(im)
label4.append(l)
for i, l in enumerate(label4):
l[:, 0] = i # add target image index for build_targets()
return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
# Ancillary functions --------------------------------------------------------------------------------------------------
def load_image(self, i):
# loads 1 image from dataset index 'i', returns im, original hw, resized hw
im = self.imgs[i]
if im is None: # not cached in ram
npy = self.img_npy[i]
if npy and npy.exists(): # load npy
im = np.load(npy)
else: # read image
path = self.img_files[i]
im = cv2.imread(path) # BGR
assert im is not None, 'Image Not Found ' + path
h0, w0 = im.shape[:2] # orig hw
r = self.img_size / max(h0, w0) # ratio
if r != 1: # if sizes are not equal
im = cv2.resize(im, (int(w0 * r), int(h0 * r)),
interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
else:
return self.imgs[i], self.img_hw0[i], self.img_hw[i] # im, hw_original, hw_resized
def load_mosaic(self, index):
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
labels4, segments4 = [], []
s = self.img_size
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
random.shuffle(indices)
for i, index in enumerate(indices):
# Load image
img, _, (h, w) = load_image(self, index)
# place img in img4
if i == 0: # top left
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
padw = x1a - x1b
padh = y1a - y1b
# Labels
labels, segments = self.labels[index].copy(), self.segments[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
labels4.append(labels)
segments4.extend(segments)
# Concat/clip labels
labels4 = np.concatenate(labels4, 0)
for x in (labels4[:, 1:], *segments4):
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
# img4, labels4 = replicate(img4, labels4) # replicate
# Augment
img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
img4, labels4 = random_perspective(img4, labels4, segments4,
degrees=self.hyp['degrees'],
translate=self.hyp['translate'],
scale=self.hyp['scale'],
shear=self.hyp['shear'],
perspective=self.hyp['perspective'],
border=self.mosaic_border) # border to remove
return img4, labels4
def load_mosaic9(self, index):
# YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic
labels9, segments9 = [], []
s = self.img_size
indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices
random.shuffle(indices)
for i, index in enumerate(indices):
# Load image
img, _, (h, w) = load_image(self, index)
# place img in img9
if i == 0: # center
img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
h0, w0 = h, w
c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates
elif i == 1: # top
c = s, s - h, s + w, s
elif i == 2: # top right
c = s + wp, s - h, s + wp + w, s
elif i == 3: # right
c = s + w0, s, s + w0 + w, s + h
elif i == 4: # bottom right
c = s + w0, s + hp, s + w0 + w, s + hp + h
elif i == 5: # bottom
c = s + w0 - w, s + h0, s + w0, s + h0 + h
elif i == 6: # bottom left
c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
elif i == 7: # left
c = s - w, s + h0 - h, s, s + h0
elif i == 8: # top left
c = s - w, s + h0 - hp - h, s, s + h0 - hp
padx, pady = c[:2]
x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords
# Labels
labels, segments = self.labels[index].copy(), self.segments[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format
segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
labels9.append(labels)
segments9.extend(segments)
# Image
img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]
hp, wp = h, w # height, width previous
# Offset
yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border] # mosaic center x, y
img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
# Concat/clip labels
labels9 = np.concatenate(labels9, 0)
labels9[:, [1, 3]] -= xc
labels9[:, [2, 4]] -= yc
c = np.array([xc, yc]) # centers
segments9 = [x - c for x in segments9]
for x in (labels9[:, 1:], *segments9):
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
# img9, labels9 = replicate(img9, labels9) # replicate
# Augment
img9, labels9 = random_perspective(img9, labels9, segments9,
degrees=self.hyp['degrees'],
translate=self.hyp['translate'],
scale=self.hyp['scale'],
shear=self.hyp['shear'],
perspective=self.hyp['perspective'],
border=self.mosaic_border) # border to remove
return img9, labels9
def create_folder(path='./new'):
# Create folder
if os.path.exists(path):
shutil.rmtree(path) # delete output folder
os.makedirs(path) # make new output folder
def flatten_recursive(path='../datasets/coco128'):
# Flatten a recursive directory by bringing all files to top level
new_path = Path(path + '_flat')
create_folder(new_path)
for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
shutil.copyfile(file, new_path / Path(file).name)
def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes()
# Convert detection dataset into classification dataset, with one directory per class
path = Path(path) # images dir
shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
files = list(path.rglob('*.*'))
n = len(files) # number of files
for im_file in tqdm(files, total=n):
if im_file.suffix[1:] in IMG_FORMATS:
# image
im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB
h, w = im.shape[:2]
# labels
lb_file = Path(img2label_paths([str(im_file)])[0])
if Path(lb_file).exists():
with open(lb_file, 'r') as f:
lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels
for j, x in enumerate(lb):
c = int(x[0]) # class
f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename
if not f.parent.is_dir():
f.parent.mkdir(parents=True)
b = x[1:] * [w, h, w, h] # box
# b[2:] = b[2:].max() # rectangle to square
b[2:] = b[2:] * 1.2 + 3 # pad
b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
Usage: from utils.datasets import *; autosplit()
Arguments
path: Path to images directory
weights: Train, val, test weights (list, tuple)
annotated_only: Only use images with an annotated txt file
"""
path = Path(path) # images dir
files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in IMG_FORMATS], []) # image files only
n = len(files) # number of files
random.seed(0) # for reproducibility
indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
[(path.parent / x).unlink(missing_ok=True) for x in txt] # remove existing
print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
for i, img in tqdm(zip(indices, files), total=n):
if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
with open(path.parent / txt[i], 'a') as f:
f.write('./' + img.relative_to(path.parent).as_posix() + '\n') # add image to txt file
def verify_image_label(args):
# Verify one image-label pair
im_file, lb_file, prefix = args
nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments
try:
# verify images
im = Image.open(im_file)
im.verify() # PIL verify
shape = exif_size(im) # image size
assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'
if im.format.lower() in ('jpg', 'jpeg'):
with open(im_file, 'rb') as f:
f.seek(-2, 2)
if f.read() != b'\xff\xd9': # corrupt JPEG
Image.open(im_file).save(im_file, format='JPEG', subsampling=0, quality=100) # re-save image
msg = f'{prefix}WARNING: corrupt JPEG restored and saved {im_file}'
# verify labels
if os.path.isfile(lb_file):
nf = 1 # label found
with open(lb_file, 'r') as f:
l = [x.split() for x in f.read().strip().splitlines() if len(x)]
if any([len(x) > 8 for x in l]): # is segment
classes = np.array([x[0] for x in l], dtype=np.float32)
segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...)
l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
l = np.array(l, dtype=np.float32)
if len(l):
assert l.shape[1] == 5, 'labels require 5 columns each'
assert (l >= 0).all(), 'negative labels'
assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'
else:
ne = 1 # label empty
l = np.zeros((0, 5), dtype=np.float32)
else:
nm = 1 # label missing
l = np.zeros((0, 5), dtype=np.float32)
return im_file, l, shape, segments, nm, nf, ne, nc, msg
except Exception as e:
nc = 1
msg = f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}'
return [None, None, None, None, nm, nf, ne, nc, msg]
def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profile=False, hub=False):
""" Return dataset statistics dictionary with images and instances counts per split per class
To run in parent directory: export PYTHONPATH="$PWD/yolov5"
Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip')
Arguments
path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
autodownload: Attempt to download dataset if not found locally
verbose: Print stats dictionary
"""
def round_labels(labels):
# Update labels to integer class and 6 decimal place floats
return [[int(c), *[round(x, 4) for x in points]] for c, *points in labels]
def unzip(path):
# Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
if str(path).endswith('.zip'): # path is data.zip
assert Path(path).is_file(), f'Error unzipping {path}, file not found'
ZipFile(path).extractall(path=path.parent) # unzip
dir = path.with_suffix('') # dataset directory == zip name
return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path
else: # path is data.yaml
return False, None, path
def hub_ops(f, max_dim=1920):
# HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing
f_new = im_dir / Path(f).name # dataset-hub image filename
try: # use PIL
im = Image.open(f)
r = max_dim / max(im.height, im.width) # ratio
if r < 1.0: # image too large
im = im.resize((int(im.width * r), int(im.height * r)))
im.save(f_new, quality=75) # save
except Exception as e: # use OpenCV
print(f'WARNING: HUB ops PIL failure {f}: {e}')
im = cv2.imread(f)
im_height, im_width = im.shape[:2]
r = max_dim / max(im_height, im_width) # ratio
if r < 1.0: # image too large
im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_LINEAR)
cv2.imwrite(str(f_new), im)
zipped, data_dir, yaml_path = unzip(Path(path))
with open(check_yaml(yaml_path), errors='ignore') as f:
data = yaml.safe_load(f) # data dict
if zipped:
data['path'] = data_dir # TODO: should this be dir.resolve()?
check_dataset(data, autodownload) # download dataset if missing
hub_dir = Path(data['path'] + ('-hub' if hub else ''))
stats = {'nc': data['nc'], 'names': data['names']} # statistics dictionary
for split in 'train', 'val', 'test':
if data.get(split) is None:
stats[split] = None # i.e. no test set
continue
x = []
dataset = LoadImagesAndLabels(data[split]) # load dataset
for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics'):
x.append(np.bincount(label[:, 0].astype(int), minlength=data['nc']))
x = np.array(x) # shape(128x80)
stats[split] = {'instance_stats': {'total': int(x.sum()), 'per_class': x.sum(0).tolist()},
'image_stats': {'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()),
'per_class': (x > 0).sum(0).tolist()},
'labels': [{str(Path(k).name): round_labels(v.tolist())} for k, v in
zip(dataset.img_files, dataset.labels)]}
if hub:
im_dir = hub_dir / 'images'
im_dir.mkdir(parents=True, exist_ok=True)
for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.n, desc='HUB Ops'):
pass
# Profile
stats_path = hub_dir / 'stats.json'
if profile:
for _ in range(1):
file = stats_path.with_suffix('.npy')
t1 = time.time()
np.save(file, stats)
t2 = time.time()
x = np.load(file, allow_pickle=True)
print(f'stats.npy times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
file = stats_path.with_suffix('.json')
t1 = time.time()
with open(file, 'w') as f:
json.dump(stats, f) # save stats *.json
t2 = time.time()
with open(file, 'r') as f:
x = json.load(f) # load hyps dict
print(f'stats.json times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
# Save, print and return
if hub:
print(f'Saving {stats_path.resolve()}...')
with open(stats_path, 'w') as f:
json.dump(stats, f) # save stats.json
if verbose:
print(json.dumps(stats, indent=2, sort_keys=False))
return stats
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Download utils
"""
import os
import platform
import subprocess
import time
import urllib
from pathlib import Path
from zipfile import ZipFile
import requests
import torch
def gsutil_getsize(url=''):
# gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
return eval(s.split(' ')[0]) if len(s) else 0 # bytes
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
file = Path(file)
assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
try: # url1
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, str(file))
assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
except Exception as e: # url2
file.unlink(missing_ok=True) # remove partial downloads
print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
finally:
if not file.exists() or file.stat().st_size < min_bytes: # check
file.unlink(missing_ok=True) # remove partial downloads
print(f"ERROR: {assert_msg}\n{error_msg}")
print('')
def attempt_download(file, repo='ultralytics/yolov5'): # from utils.downloads import *; attempt_download()
# Attempt file download if does not exist
file = Path(str(file).strip().replace("'", ''))
if not file.exists():
# URL specified
name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
if str(file).startswith(('http:/', 'https:/')): # download
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
name = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
safe_download(file=name, url=url, min_bytes=1E5)
return name
# GitHub assets
file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)
try:
response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api
assets = [x['name'] for x in response['assets']] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...]
tag = response['tag_name'] # i.e. 'v1.0'
except: # fallback plan
assets = ['yolov5n.pt', 'yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt',
'yolov5n6.pt', 'yolov5s6.pt', 'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt']
try:
tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
except:
tag = 'v6.0' # current release
if name in assets:
safe_download(file,
url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
# url2=f'https://storage.googleapis.com/{repo}/ckpt/{name}', # backup url (optional)
min_bytes=1E5,
error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/')
return str(file)
def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
# Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download()
t = time.time()
file = Path(file)
cookie = Path('cookie') # gdrive cookie
print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
file.unlink(missing_ok=True) # remove existing file
cookie.unlink(missing_ok=True) # remove existing cookie
# Attempt file download
out = "NUL" if platform.system() == "Windows" else "/dev/null"
os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
if os.path.exists('cookie'): # large file
s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
else: # small file
s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
r = os.system(s) # execute, capture return
cookie.unlink(missing_ok=True) # remove existing cookie
# Error check
if r != 0:
file.unlink(missing_ok=True) # remove partial
print('Download error ') # raise Exception('Download error')
return r
# Unzip if archive
if file.suffix == '.zip':
print('unzipping... ', end='')
ZipFile(file).extractall(path=file.parent) # unzip
file.unlink() # remove zip
print(f'Done ({time.time() - t:.1f}s)')
return r
def get_token(cookie="./cookie"):
with open(cookie) as f:
for line in f:
if "download" in line:
return line.split()[-1]
return ""
# Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
#
#
# def upload_blob(bucket_name, source_file_name, destination_blob_name):
# # Uploads a file to a bucket
# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
#
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(destination_blob_name)
#
# blob.upload_from_filename(source_file_name)
#
# print('File {} uploaded to {}.'.format(
# source_file_name,
# destination_blob_name))
#
#
# def download_blob(bucket_name, source_blob_name, destination_file_name):
# # Uploads a blob from a bucket
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(source_blob_name)
#
# blob.download_to_filename(destination_file_name)
#
# print('Blob {} downloaded to {}.'.format(
# source_blob_name,
# destination_file_name))
# Flask REST API
[REST](https://en.wikipedia.org/wiki/Representational_state_transfer) [API](https://en.wikipedia.org/wiki/API)s are
commonly used to expose Machine Learning (ML) models to other services. This folder contains an example REST API
created using Flask to expose the YOLOv5s model from [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/).
## Requirements
[Flask](https://palletsprojects.com/p/flask/) is required. Install with:
```shell
$ pip install Flask
```
## Run
After Flask installation run:
```shell
$ python3 restapi.py --port 5000
```
Then use [curl](https://curl.se/) to perform a request:
```shell
$ curl -X POST -F image=@zidane.jpg 'http://localhost:5000/v1/object-detection/yolov5s'
```
The model inference results are returned as a JSON response:
```json
[
{
"class": 0,
"confidence": 0.8900438547,
"height": 0.9318675399,
"name": "person",
"width": 0.3264600933,
"xcenter": 0.7438579798,
"ycenter": 0.5207948685
},
{
"class": 0,
"confidence": 0.8440024257,
"height": 0.7155083418,
"name": "person",
"width": 0.6546785235,
"xcenter": 0.427829951,
"ycenter": 0.6334488392
},
{
"class": 27,
"confidence": 0.3771208823,
"height": 0.3902671337,
"name": "tie",
"width": 0.0696444362,
"xcenter": 0.3675483763,
"ycenter": 0.7991207838
},
{
"class": 27,
"confidence": 0.3527112305,
"height": 0.1540903747,
"name": "tie",
"width": 0.0336618312,
"xcenter": 0.7814827561,
"ycenter": 0.5065554976
}
]
```
An example python script to perform inference using [requests](https://docs.python-requests.org/en/master/) is given
in `example_request.py`
"""Perform test request"""
import pprint
import requests
DETECTION_URL = "http://localhost:5000/v1/object-detection/yolov5s"
TEST_IMAGE = "zidane.jpg"
image_data = open(TEST_IMAGE, "rb").read()
response = requests.post(DETECTION_URL, files={"image": image_data}).json()
pprint.pprint(response)
"""
Run a rest API exposing the yolov5s object detection model
"""
import argparse
import io
import torch
from PIL import Image
from flask import Flask, request
app = Flask(__name__)
DETECTION_URL = "/v1/object-detection/yolov5s"
@app.route(DETECTION_URL, methods=["POST"])
def predict():
if not request.method == "POST":
return
if request.files.get("image"):
image_file = request.files["image"]
image_bytes = image_file.read()
img = Image.open(io.BytesIO(image_bytes))
results = model(img, size=640) # reduce size=320 for faster inference
return results.pandas().xyxy[0].to_json(orient="records")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Flask API exposing YOLOv5 model")
parser.add_argument("--port", default=5000, type=int, help="port number")
args = parser.parse_args()
model = torch.hub.load("ultralytics/yolov5", "yolov5s", force_reload=True) # force_reload to recache
app.run(host="0.0.0.0", port=args.port) # debug=True causes Restarting with stat
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
General utils
"""
import contextlib
import glob
import logging
import math
import os
import platform
import random
import re
import signal
import time
import urllib
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
from subprocess import check_output
from zipfile import ZipFile
import cv2
import numpy as np
import pandas as pd
import pkg_resources as pkg
import torch
import torchvision
import yaml
from yolov5processor.utils.downloads import gsutil_getsize
from yolov5processor.utils.metrics import box_iou, fitness
# Settings
torch.set_printoptions(linewidth=320, precision=5, profile='long')
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
pd.options.display.max_columns = 10
cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8)) # NumExpr max threads
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
class Profile(contextlib.ContextDecorator):
# Usage: @Profile() decorator or 'with Profile():' context manager
def __enter__(self):
self.start = time.time()
def __exit__(self, type, value, traceback):
print(f'Profile results: {time.time() - self.start:.5f}s')
class Timeout(contextlib.ContextDecorator):
# Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager
def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True):
self.seconds = int(seconds)
self.timeout_message = timeout_msg
self.suppress = bool(suppress_timeout_errors)
def _timeout_handler(self, signum, frame):
raise TimeoutError(self.timeout_message)
def __enter__(self):
signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM
signal.alarm(self.seconds) # start countdown for SIGALRM to be raised
def __exit__(self, exc_type, exc_val, exc_tb):
signal.alarm(0) # Cancel SIGALRM if it's scheduled
if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError
return True
def try_except(func):
# try-except function. Usage: @try_except decorator
def handler(*args, **kwargs):
try:
func(*args, **kwargs)
except Exception as e:
print(e)
return handler
def methods(instance):
# Get class/instance methods
return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")]
def set_logging(rank=-1, verbose=True):
logging.basicConfig(
format="%(message)s",
level=logging.INFO if (verbose and rank in [-1, 0]) else logging.WARN)
def print_args(name, opt):
# Print argparser arguments
print(colorstr(f'{name}: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
def init_seeds(seed=0):
# Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html
# cudnn seed 0 settings are slower and more reproducible, else faster and less reproducible
import torch.backends.cudnn as cudnn
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
cudnn.benchmark, cudnn.deterministic = (False, True) if seed == 0 else (True, False)
def get_latest_run(search_dir='.'):
# Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
return max(last_list, key=os.path.getctime) if last_list else ''
def user_config_dir(dir='Ultralytics', env_var='YOLOV5_CONFIG_DIR'):
# Return path of user configuration directory. Prefer environment variable if exists. Make dir if required.
env = os.getenv(env_var)
if env:
path = Path(env) # use environment variable
else:
cfg = {'Windows': 'AppData/Roaming', 'Linux': '.config', 'Darwin': 'Library/Application Support'} # 3 OS dirs
path = Path.home() / cfg.get(platform.system(), '') # OS-specific config dir
path = (path if is_writeable(path) else Path('/tmp')) / dir # GCP and AWS lambda fix, only /tmp is writeable
path.mkdir(exist_ok=True) # make if required
return path
def is_writeable(dir, test=False):
# Return True if directory has write permissions, test opening a file with write permissions if test=True
if test: # method 1
file = Path(dir) / 'tmp.txt'
try:
with open(file, 'w'): # open file with write permissions
pass
file.unlink() # remove file
return True
except IOError:
return False
else: # method 2
return os.access(dir, os.R_OK) # possible issues on Windows
def is_docker():
# Is environment a Docker container?
return Path('/workspace').exists() # or Path('/.dockerenv').exists()
def is_colab():
# Is environment a Google Colab instance?
try:
import google.colab
return True
except ImportError:
return False
def is_pip():
# Is file in a pip package?
return 'site-packages' in Path(__file__).resolve().parts
def is_ascii(s=''):
# Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7)
s = str(s) # convert list, tuple, None, etc. to str
return len(s.encode().decode('ascii', 'ignore')) == len(s)
def is_chinese(s='人工智能'):
# Is string composed of any Chinese characters?
return re.search('[\u4e00-\u9fff]', s)
def emojis(str=''):
# Return platform-dependent emoji-safe version of string
return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
def file_size(path):
# Return file/dir size (MB)
path = Path(path)
if path.is_file():
return path.stat().st_size / 1E6
elif path.is_dir():
return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / 1E6
else:
return 0.0
def check_online():
# Check internet connectivity
import socket
try:
socket.create_connection(("1.1.1.1", 443), 5) # check host accessibility
return True
except OSError:
return False
@try_except
def check_git_status():
# Recommend 'git pull' if code is out of date
msg = ', for updates see https://github.com/ultralytics/yolov5'
print(colorstr('github: '), end='')
assert Path('.git').exists(), 'skipping check (not a git repository)' + msg
assert not is_docker(), 'skipping check (Docker image)' + msg
assert check_online(), 'skipping check (offline)' + msg
cmd = 'git fetch && git config --get remote.origin.url'
url = check_output(cmd, shell=True, timeout=5).decode().strip().rstrip('.git') # git fetch
branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out
n = int(check_output(f'git rev-list {branch}..origin/master --count', shell=True)) # commits behind
if n > 0:
s = f"⚠️ YOLOv5 is out of date by {n} commit{'s' * (n > 1)}. Use `git pull` or `git clone {url}` to update."
else:
s = f'up to date with {url} ✅'
print(emojis(s)) # emoji-safe
def check_python(minimum='3.6.2'):
# Check current python version vs. required python version
check_version(platform.python_version(), minimum, name='Python ')
def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False):
# Check version vs. required version
current, minimum = (pkg.parse_version(x) for x in (current, minimum))
result = (current == minimum) if pinned else (current >= minimum)
assert result, f'{name}{minimum} required by YOLOv5, but {name}{current} is currently installed'
@try_except
def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True):
# Check installed dependencies meet requirements (pass *.txt file or list of packages)
prefix = colorstr('red', 'bold', 'requirements:')
check_python() # check python version
if isinstance(requirements, (str, Path)): # requirements.txt file
file = Path(requirements)
assert file.exists(), f"{prefix} {file.resolve()} not found, check failed."
requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(file.open()) if x.name not in exclude]
else: # list or tuple of packages
requirements = [x for x in requirements if x not in exclude]
n = 0 # number of packages updates
for r in requirements:
try:
pkg.require(r)
except Exception as e: # DistributionNotFound or VersionConflict if requirements not met
s = f"{prefix} {r} not found and is required by YOLOv5"
if install:
print(f"{s}, attempting auto-update...")
try:
assert check_online(), f"'pip install {r}' skipped (offline)"
print(check_output(f"pip install '{r}'", shell=True).decode())
n += 1
except Exception as e:
print(f'{prefix} {e}')
else:
print(f'{s}. Please install and rerun your command.')
if n: # if packages updated
source = file.resolve() if 'file' in locals() else requirements
s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
print(emojis(s))
def check_img_size(imgsz, s=32, floor=0):
# Verify image size is a multiple of stride s in each dimension
if isinstance(imgsz, int): # integer i.e. img_size=640
new_size = max(make_divisible(imgsz, int(s)), floor)
else: # list i.e. img_size=[640, 480]
new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]
if new_size != imgsz:
print(f'WARNING: --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
return new_size
def check_imshow():
# Check if environment supports image displays
try:
assert not is_docker(), 'cv2.imshow() is disabled in Docker environments'
assert not is_colab(), 'cv2.imshow() is disabled in Google Colab environments'
cv2.imshow('test', np.zeros((1, 1, 3)))
cv2.waitKey(1)
cv2.destroyAllWindows()
cv2.waitKey(1)
return True
except Exception as e:
print(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
return False
def check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):
# Check file(s) for acceptable suffixes
if file and suffix:
if isinstance(suffix, str):
suffix = [suffix]
for f in file if isinstance(file, (list, tuple)) else [file]:
assert Path(f).suffix.lower() in suffix, f"{msg}{f} acceptable suffix is {suffix}"
def check_yaml(file, suffix=('.yaml', '.yml')):
# Search/download YAML file (if necessary) and return path, checking suffix
return check_file(file, suffix)
def check_file(file, suffix=''):
# Search/download file (if necessary) and return path
check_suffix(file, suffix) # optional
file = str(file) # convert to str()
if Path(file).is_file() or file == '': # exists
return file
elif file.startswith(('http:/', 'https:/')): # download
url = str(Path(file)).replace(':/', '://') # Pathlib turns :// -> :/
file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, file)
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
return file
else: # search
files = []
for d in 'data', 'models', 'utils': # search directories
files.extend(glob.glob(str(ROOT / d / '**' / file), recursive=True)) # find file
assert len(files), f'File not found: {file}' # assert file was found
assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique
return files[0] # return file
def check_dataset(data, autodownload=True):
# Download and/or unzip dataset if not found locally
# Usage: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128_with_yaml.zip
# Download (optional)
extract_dir = ''
if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip
download(data, dir='../datasets', unzip=True, delete=False, curl=False, threads=1)
data = next((Path('../datasets') / Path(data).stem).rglob('*.yaml'))
extract_dir, autodownload = data.parent, False
# Read yaml (optional)
if isinstance(data, (str, Path)):
with open(data, errors='ignore') as f:
data = yaml.safe_load(f) # dictionary
# Parse yaml
path = extract_dir or Path(data.get('path') or '') # optional 'path' default to '.'
for k in 'train', 'val', 'test':
if data.get(k): # prepend path
data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
assert 'nc' in data, "Dataset 'nc' key missing."
if 'names' not in data:
data['names'] = [f'class{i}' for i in range(data['nc'])] # assign class names if missing
train, val, test, s = [data.get(x) for x in ('train', 'val', 'test', 'download')]
if val:
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
if not all(x.exists() for x in val):
print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
if s and autodownload: # download script
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
if s.startswith('http') and s.endswith('.zip'): # URL
f = Path(s).name # filename
print(f'Downloading {s} to {f}...')
torch.hub.download_url_to_file(s, f)
Path(root).mkdir(parents=True, exist_ok=True) # create root
ZipFile(f).extractall(path=root) # unzip
Path(f).unlink() # remove zip
r = None # success
elif s.startswith('bash '): # bash script
print(f'Running {s} ...')
r = os.system(s)
else: # python script
r = exec(s, {'yaml': data}) # return None
print(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}\n")
else:
raise Exception('Dataset not found.')
return data # dictionary
def url2file(url):
# Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/
file = Path(urllib.parse.unquote(url)).name.split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth
return file
def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
# Multi-threaded file download and unzip function, used in data.yaml for autodownload
def download_one(url, dir):
# Download 1 file
f = dir / Path(url).name # filename
if Path(url).is_file(): # exists in current path
Path(url).rename(f) # move to dir
elif not f.exists():
print(f'Downloading {url} to {f}...')
if curl:
os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -") # curl download, retry and resume on fail
else:
torch.hub.download_url_to_file(url, f, progress=True) # torch download
if unzip and f.suffix in ('.zip', '.gz'):
print(f'Unzipping {f}...')
if f.suffix == '.zip':
ZipFile(f).extractall(path=dir) # unzip
elif f.suffix == '.gz':
os.system(f'tar xfz {f} --directory {f.parent}') # unzip
if delete:
f.unlink() # remove zip
dir = Path(dir)
dir.mkdir(parents=True, exist_ok=True) # make directory
if threads > 1:
pool = ThreadPool(threads)
pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
pool.close()
pool.join()
else:
for u in [url] if isinstance(url, (str, Path)) else url:
download_one(u, dir)
def make_divisible(x, divisor):
# Returns x evenly divisible by divisor
return math.ceil(x / divisor) * divisor
def clean_str(s):
# Cleans a string by replacing special characters with underscore _
return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
def one_cycle(y1=0.0, y2=1.0, steps=100):
# lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf
return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
def colorstr(*input):
# Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')
*args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string
colors = {'black': '\033[30m', # basic colors
'red': '\033[31m',
'green': '\033[32m',
'yellow': '\033[33m',
'blue': '\033[34m',
'magenta': '\033[35m',
'cyan': '\033[36m',
'white': '\033[37m',
'bright_black': '\033[90m', # bright colors
'bright_red': '\033[91m',
'bright_green': '\033[92m',
'bright_yellow': '\033[93m',
'bright_blue': '\033[94m',
'bright_magenta': '\033[95m',
'bright_cyan': '\033[96m',
'bright_white': '\033[97m',
'end': '\033[0m', # misc
'bold': '\033[1m',
'underline': '\033[4m'}
return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
def labels_to_class_weights(labels, nc=80):
# Get class weights (inverse frequency) from training labels
if labels[0] is None: # no labels loaded
return torch.Tensor()
labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
classes = labels[:, 0].astype(np.int) # labels = [class xywh]
weights = np.bincount(classes, minlength=nc) # occurrences per class
# Prepend gridpoint count (for uCE training)
# gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image
# weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start
weights[weights == 0] = 1 # replace empty bins with 1
weights = 1 / weights # number of targets per class
weights /= weights.sum() # normalize
return torch.from_numpy(weights)
def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
# Produces image weights based on class_weights and image contents
class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
# index = random.choices(range(n), weights=image_weights, k=1) # weight image sample
return image_weights
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
# x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
# x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
return x
def xyxy2xywh(x):
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
y[:, 2] = x[:, 2] - x[:, 0] # width
y[:, 3] = x[:, 3] - x[:, 1] # height
return y
def xywh2xyxy(x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x
y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y
y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x
y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y
return y
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
if clip:
clip_coords(x, (h - eps, w - eps)) # warning: inplace clip
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center
y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center
y[:, 2] = (x[:, 2] - x[:, 0]) / w # width
y[:, 3] = (x[:, 3] - x[:, 1]) / h # height
return y
def xyn2xy(x, w=640, h=640, padw=0, padh=0):
# Convert normalized segments into pixel segments, shape (n,2)
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = w * x[:, 0] + padw # top left x
y[:, 1] = h * x[:, 1] + padh # top left y
return y
def segment2box(segment, width=640, height=640):
# Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
x, y = segment.T # segment xy
inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
x, y, = x[inside], y[inside]
return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy
def segments2boxes(segments):
# Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
boxes = []
for s in segments:
x, y = s.T # segment xy
boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy
return xyxy2xywh(np.array(boxes)) # cls, xywh
def resample_segments(segments, n=1000):
# Up-sample an (n,2) segment
for i, s in enumerate(segments):
x = np.linspace(0, len(s) - 1, n)
xp = np.arange(len(s))
segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy
return segments
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
coords[:, [0, 2]] -= pad[0] # x padding
coords[:, [1, 3]] -= pad[1] # y padding
coords[:, :4] /= gain
clip_coords(coords, img0_shape)
return coords
def clip_coords(boxes, shape):
# Clip bounding xyxy bounding boxes to image shape (height, width)
if isinstance(boxes, torch.Tensor): # faster individually
boxes[:, 0].clamp_(0, shape[1]) # x1
boxes[:, 1].clamp_(0, shape[0]) # y1
boxes[:, 2].clamp_(0, shape[1]) # x2
boxes[:, 3].clamp_(0, shape[0]) # y2
else: # np.array (faster grouped)
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
labels=(), max_det=300):
"""Runs Non-Maximum Suppression (NMS) on inference results
Returns:
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
"""
nc = prediction.shape[2] - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
# Checks
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
# Settings
min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 10.0 # seconds to quit after
redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
t = time.time()
output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
l = labels[xi]
v = torch.zeros((len(l), nc + 5), device=x.device)
v[:, :4] = l[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
x = torch.cat((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
box = xywh2xyxy(x[:, :4])
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
else: # best class only
conf, j = x[:, 5:].max(1, keepdim=True)
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
# Apply finite constraint
# if not torch.isfinite(x).all():
# x = x[torch.isfinite(x).all(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
elif n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
if i.shape[0] > max_det: # limit detections
i = i[:max_det]
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
if (time.time() - t) > time_limit:
print(f'WARNING: NMS time limit {time_limit}s exceeded')
break # time limit exceeded
return output
def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer()
# Strip optimizer from 'f' to finalize training, optionally save as 's'
x = torch.load(f, map_location=torch.device('cpu'))
if x.get('ema'):
x['model'] = x['ema'] # replace model with ema
for k in 'optimizer', 'training_results', 'wandb_id', 'ema', 'updates': # keys
x[k] = None
x['epoch'] = -1
x['model'].half() # to FP16
for p in x['model'].parameters():
p.requires_grad = False
torch.save(x, s or f)
mb = os.path.getsize(s or f) / 1E6 # filesize
print(f"Optimizer stripped from {f},{(' saved as %s,' % s) if s else ''} {mb:.1f}MB")
def print_mutation(results, hyp, save_dir, bucket):
evolve_csv, results_csv, evolve_yaml = save_dir / 'evolve.csv', save_dir / 'results.csv', save_dir / 'hyp_evolve.yaml'
keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
'val/box_loss', 'val/obj_loss', 'val/cls_loss') + tuple(hyp.keys()) # [results + hyps]
keys = tuple(x.strip() for x in keys)
vals = results + tuple(hyp.values())
n = len(keys)
# Download (optional)
if bucket:
url = f'gs://{bucket}/evolve.csv'
if gsutil_getsize(url) > (os.path.getsize(evolve_csv) if os.path.exists(evolve_csv) else 0):
os.system(f'gsutil cp {url} {save_dir}') # download evolve.csv if larger than local
# Log to evolve.csv
s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n') # add header
with open(evolve_csv, 'a') as f:
f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n')
# Print to screen
print(colorstr('evolve: ') + ', '.join(f'{x.strip():>20s}' for x in keys))
print(colorstr('evolve: ') + ', '.join(f'{x:20.5g}' for x in vals), end='\n\n\n')
# Save yaml
with open(evolve_yaml, 'w') as f:
data = pd.read_csv(evolve_csv)
data = data.rename(columns=lambda x: x.strip()) # strip keys
i = np.argmax(fitness(data.values[:, :7])) #
f.write('# YOLOv5 Hyperparameter Evolution Results\n' +
f'# Best generation: {i}\n' +
f'# Last generation: {len(data)}\n' +
'# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) + '\n' +
'# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n')
yaml.safe_dump(hyp, f, sort_keys=False)
if bucket:
os.system(f'gsutil cp {evolve_csv} {evolve_yaml} gs://{bucket}') # upload
def apply_classifier(x, model, img, im0):
# Apply a second stage classifier to yolo outputs
im0 = [im0] if isinstance(im0, np.ndarray) else im0
for i, d in enumerate(x): # per image
if d is not None and len(d):
d = d.clone()
# Reshape and pad cutouts
b = xyxy2xywh(d[:, :4]) # boxes
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square
b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad
d[:, :4] = xywh2xyxy(b).long()
# Rescale boxes from img_size to im0 size
scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
# Classes
pred_cls1 = d[:, 5].long()
ims = []
for j, a in enumerate(d): # per item
cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
im = cv2.resize(cutout, (224, 224)) # BGR
# cv2.imwrite('example%i.jpg' % j, cutout)
im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32
im /= 255.0 # 0 - 255 to 0.0 - 1.0
ims.append(im)
pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction
x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections
return x
def save_one_box(xyxy, im, file='image.jpg', gain=1.02, pad=10, square=False, BGR=False, save=True):
# Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
xyxy = torch.tensor(xyxy).view(-1, 4)
b = xyxy2xywh(xyxy) # boxes
if square:
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
xyxy = xywh2xyxy(b).long()
clip_coords(xyxy, im.shape)
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
if save:
cv2.imwrite(str(increment_path(file, mkdir=True).with_suffix('.jpg')), crop)
return crop
def increment_path(path, exist_ok=False, sep='', mkdir=False):
# Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
path = Path(path) # os-agnostic
if path.exists() and not exist_ok:
suffix = path.suffix
path = path.with_suffix('')
dirs = glob.glob(f"{path}{sep}*") # similar paths
matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
i = [int(m.groups()[0]) for m in matches if m] # indices
n = max(i) + 1 if i else 2 # increment number
path = Path(f"{path}{sep}{n}{suffix}") # update path
dir = path if path.suffix == '' else path.parent # directory
if not dir.exists() and mkdir:
dir.mkdir(parents=True, exist_ok=True) # make directory
return path
FROM gcr.io/google-appengine/python
# Create a virtualenv for dependencies. This isolates these packages from
# system-level packages.
# Use -p python3 or -p python3.7 to select python version. Default is version 2.
RUN virtualenv /env -p python3
# Setting these environment variables are the same as running
# source /env/bin/activate.
ENV VIRTUAL_ENV /env
ENV PATH /env/bin:$PATH
RUN apt-get update && apt-get install -y python-opencv
# Copy the application's requirements.txt and run pip to install all
# dependencies into the virtualenv.
ADD requirements.txt /app/requirements.txt
RUN pip install -r /app/requirements.txt
# Add the application source code.
ADD . /app
# Run a WSGI server to serve the application. gunicorn must be declared as
# a dependency in requirements.txt.
CMD gunicorn -b :$PORT main:app
# add these requirements in your app on top of the existing ones
pip==19.2
Flask==1.0.2
gunicorn==19.9.0
runtime: custom
env: flex
service: yolov5app
liveness_check:
initial_delay_sec: 600
manual_scaling:
instances: 1
resources:
cpu: 1
memory_gb: 4
disk_size_gb: 20
\ No newline at end of file
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Logging utils
"""
import os
import warnings
from threading import Thread
import pkg_resources as pkg
import torch
from torch.utils.tensorboard import SummaryWriter
from utils.general import colorstr, emojis
from utils.loggers.wandb.wandb_utils import WandbLogger
from utils.plots import plot_images, plot_results
from utils.torch_utils import de_parallel
LOGGERS = ('csv', 'tb', 'wandb') # text-file, TensorBoard, Weights & Biases
RANK = int(os.getenv('RANK', -1))
try:
import wandb
assert hasattr(wandb, '__version__') # verify package import not local dir
if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.2') and RANK in [0, -1]:
wandb_login_success = wandb.login(timeout=30)
if not wandb_login_success:
wandb = None
except (ImportError, AssertionError):
wandb = None
class Loggers():
# YOLOv5 Loggers class
def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
self.save_dir = save_dir
self.weights = weights
self.opt = opt
self.hyp = hyp
self.logger = logger # for printing results to console
self.include = include
self.keys = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss
'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', # metrics
'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss
'x/lr0', 'x/lr1', 'x/lr2'] # params
for k in LOGGERS:
setattr(self, k, None) # init empty logger dictionary
self.csv = True # always log to csv
# Message
if not wandb:
prefix = colorstr('Weights & Biases: ')
s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs (RECOMMENDED)"
print(emojis(s))
# TensorBoard
s = self.save_dir
if 'tb' in self.include and not self.opt.evolve:
prefix = colorstr('TensorBoard: ')
self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/")
self.tb = SummaryWriter(str(s))
# W&B
if wandb and 'wandb' in self.include:
wandb_artifact_resume = isinstance(self.opt.resume, str) and self.opt.resume.startswith('wandb-artifact://')
run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume and not wandb_artifact_resume else None
self.opt.hyp = self.hyp # add hyperparameters
self.wandb = WandbLogger(self.opt, run_id)
else:
self.wandb = None
def on_pretrain_routine_end(self):
# Callback runs on pre-train routine end
paths = self.save_dir.glob('*labels*.jpg') # training labels
if self.wandb:
self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
def on_train_batch_end(self, ni, model, imgs, targets, paths, plots, sync_bn):
# Callback runs on train batch end
if plots:
if ni == 0:
if not sync_bn: # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress jit trace warning
self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
if ni < 3:
f = self.save_dir / f'train_batch{ni}.jpg' # filename
Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
if self.wandb and ni == 10:
files = sorted(self.save_dir.glob('train*.jpg'))
self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]})
def on_train_epoch_end(self, epoch):
# Callback runs on train epoch end
if self.wandb:
self.wandb.current_epoch = epoch + 1
def on_val_image_end(self, pred, predn, path, names, im):
# Callback runs on val image end
if self.wandb:
self.wandb.val_one_image(pred, predn, path, names, im)
def on_val_end(self):
# Callback runs on val end
if self.wandb:
files = sorted(self.save_dir.glob('val*.jpg'))
self.wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in files]})
def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
# Callback runs at the end of each fit (train+val) epoch
x = {k: v for k, v in zip(self.keys, vals)} # dict
if self.csv:
file = self.save_dir / 'results.csv'
n = len(x) + 1 # number of cols
s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n') # add header
with open(file, 'a') as f:
f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
if self.tb:
for k, v in x.items():
self.tb.add_scalar(k, v, epoch)
if self.wandb:
self.wandb.log(x)
self.wandb.end_epoch(best_result=best_fitness == fi)
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
# Callback runs on model save event
if self.wandb:
if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
def on_train_end(self, last, best, plots, epoch):
# Callback runs on training end
if plots:
plot_results(file=self.save_dir / 'results.csv') # save results.png
files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter
if self.tb:
import cv2
for f in files:
self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
if self.wandb:
self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]})
# Calling wandb.log. TODO: Refactor this into WandbLogger.log_model
if not self.opt.evolve:
wandb.log_artifact(str(best if best.exists() else last), type='model',
name='run_' + self.wandb.wandb_run.id + '_model',
aliases=['latest', 'best', 'stripped'])
self.wandb.finish_run()
else:
self.wandb.finish_run()
self.wandb = WandbLogger(self.opt)
📚 This guide explains how to use **Weights & Biases** (W&B) with YOLOv5 🚀. UPDATED 29 September 2021.
* [About Weights & Biases](#about-weights-&-biases)
* [First-Time Setup](#first-time-setup)
* [Viewing runs](#viewing-runs)
* [Advanced Usage: Dataset Versioning and Evaluation](#advanced-usage)
* [Reports: Share your work with the world!](#reports)
## About Weights & Biases
Think of [W&B](https://wandb.ai/site?utm_campaign=repo_yolo_wandbtutorial) like GitHub for machine learning models. With a few lines of code, save everything you need to debug, compare and reproduce your models — architecture, hyperparameters, git commits, model weights, GPU usage, and even datasets and predictions.
Used by top researchers including teams at OpenAI, Lyft, Github, and MILA, W&B is part of the new standard of best practices for machine learning. How W&B can help you optimize your machine learning workflows:
* [Debug](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#Free-2) model performance in real time
* [GPU usage](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#System-4) visualized automatically
* [Custom charts](https://wandb.ai/wandb/customizable-charts/reports/Powerful-Custom-Charts-To-Debug-Model-Peformance--VmlldzoyNzY4ODI) for powerful, extensible visualization
* [Share insights](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#Share-8) interactively with collaborators
* [Optimize hyperparameters](https://docs.wandb.com/sweeps) efficiently
* [Track](https://docs.wandb.com/artifacts) datasets, pipelines, and production models
## First-Time Setup
<details open>
<summary> Toggle Details </summary>
When you first train, W&B will prompt you to create a new account and will generate an **API key** for you. If you are an existing user you can retrieve your key from https://wandb.ai/authorize. This key is used to tell W&B where to log your data. You only need to supply your key once, and then it is remembered on the same device.
W&B will create a cloud **project** (default is 'YOLOv5') for your training runs, and each new training run will be provided a unique run **name** within that project as project/name. You can also manually set your project and run name as:
```shell
$ python train.py --project ... --name ...
```
YOLOv5 notebook example: <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> <a href="https://www.kaggle.com/ultralytics/yolov5"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"></a>
<img width="960" alt="Screen Shot 2021-09-29 at 10 23 13 PM" src="https://user-images.githubusercontent.com/26833433/135392431-1ab7920a-c49d-450a-b0b0-0c86ec86100e.png">
</details>
## Viewing Runs
<details open>
<summary> Toggle Details </summary>
Run information streams from your environment to the W&B cloud console as you train. This allows you to monitor and even cancel runs in <b>realtime</b> . All important information is logged:
* Training & Validation losses
* Metrics: Precision, Recall, mAP@0.5, mAP@0.5:0.95
* Learning Rate over time
* A bounding box debugging panel, showing the training progress over time
* GPU: Type, **GPU Utilization**, power, temperature, **CUDA memory usage**
* System: Disk I/0, CPU utilization, RAM memory usage
* Your trained model as W&B Artifact
* Environment: OS and Python types, Git repository and state, **training command**
<p align="center"><img width="900" alt="Weights & Biases dashboard" src="https://user-images.githubusercontent.com/26833433/135390767-c28b050f-8455-4004-adb0-3b730386e2b2.png"></p>
</details>
## Advanced Usage
You can leverage W&B artifacts and Tables integration to easily visualize and manage your datasets, models and training evaluations. Here are some quick examples to get you started.
<details open>
<h3>1. Visualize and Version Datasets</h3>
Log, visualize, dynamically query, and understand your data with <a href='https://docs.wandb.ai/guides/data-vis/tables'>W&B Tables</a>. You can use the following command to log your dataset as a W&B Table. This will generate a <code>{dataset}_wandb.yaml</code> file which can be used to train from dataset artifact.
<details>
<summary> <b>Usage</b> </summary>
<b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --project ... --name ... --data .. </code>
![Screenshot (64)](https://user-images.githubusercontent.com/15766192/128486078-d8433890-98a3-4d12-8986-b6c0e3fc64b9.png)
</details>
<h3> 2: Train and Log Evaluation simultaneousy </h3>
This is an extension of the previous section, but it'll also training after uploading the dataset. <b> This also evaluation Table</b>
Evaluation table compares your predictions and ground truths across the validation set for each epoch. It uses the references to the already uploaded datasets,
so no images will be uploaded from your system more than once.
<details>
<summary> <b>Usage</b> </summary>
<b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --data .. --upload_data </code>
![Screenshot (72)](https://user-images.githubusercontent.com/15766192/128979739-4cf63aeb-a76f-483f-8861-1c0100b938a5.png)
</details>
<h3> 3: Train using dataset artifact </h3>
When you upload a dataset as described in the first section, you get a new config file with an added `_wandb` to its name. This file contains the information that
can be used to train a model directly from the dataset artifact. <b> This also logs evaluation </b>
<details>
<summary> <b>Usage</b> </summary>
<b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --data {data}_wandb.yaml </code>
![Screenshot (72)](https://user-images.githubusercontent.com/15766192/128979739-4cf63aeb-a76f-483f-8861-1c0100b938a5.png)
</details>
<h3> 4: Save model checkpoints as artifacts </h3>
To enable saving and versioning checkpoints of your experiment, pass `--save_period n` with the base cammand, where `n` represents checkpoint interval.
You can also log both the dataset and model checkpoints simultaneously. If not passed, only the final model will be logged
<details>
<summary> <b>Usage</b> </summary>
<b>Code</b> <code> $ python train.py --save_period 1 </code>
![Screenshot (68)](https://user-images.githubusercontent.com/15766192/128726138-ec6c1f60-639d-437d-b4ee-3acd9de47ef3.png)
</details>
</details>
<h3> 5: Resume runs from checkpoint artifacts. </h3>
Any run can be resumed using artifacts if the <code>--resume</code> argument starts with <code>wandb-artifact://</code> prefix followed by the run path, i.e, <code>wandb-artifact://username/project/runid </code>. This doesn't require the model checkpoint to be present on the local system.
<details>
<summary> <b>Usage</b> </summary>
<b>Code</b> <code> $ python train.py --resume wandb-artifact://{run_path} </code>
![Screenshot (70)](https://user-images.githubusercontent.com/15766192/128728988-4e84b355-6c87-41ae-a591-14aecf45343e.png)
</details>
<h3> 6: Resume runs from dataset artifact & checkpoint artifacts. </h3>
<b> Local dataset or model checkpoints are not required. This can be used to resume runs directly on a different device </b>
The syntax is same as the previous section, but you'll need to lof both the dataset and model checkpoints as artifacts, i.e, set bot <code>--upload_dataset</code> or
train from <code>_wandb.yaml</code> file and set <code>--save_period</code>
<details>
<summary> <b>Usage</b> </summary>
<b>Code</b> <code> $ python train.py --resume wandb-artifact://{run_path} </code>
![Screenshot (70)](https://user-images.githubusercontent.com/15766192/128728988-4e84b355-6c87-41ae-a591-14aecf45343e.png)
</details>
</details>
<h3> Reports </h3>
W&B Reports can be created from your saved runs for sharing online. Once a report is created you will receive a link you can use to publically share your results. Here is an example report created from the COCO128 tutorial trainings of all four YOLOv5 models ([link](https://wandb.ai/glenn-jocher/yolov5_tutorial/reports/YOLOv5-COCO128-Tutorial-Results--VmlldzozMDI5OTY)).
<img width="900" alt="Weights & Biases Reports" src="https://user-images.githubusercontent.com/26833433/135394029-a17eaf86-c6c1-4b1d-bb80-b90e83aaffa7.png">
## Environments
YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):
- **Google Colab and Kaggle** notebooks with free GPU: <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> <a href="https://www.kaggle.com/ultralytics/yolov5"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"></a>
- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/AWS-Quickstart)
- **Docker Image**. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) <a href="https://hub.docker.com/r/ultralytics/yolov5"><img src="https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker" alt="Docker Pulls"></a>
## Status
![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg)
If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), validation ([val.py](https://github.com/ultralytics/yolov5/blob/master/val.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/export.py)) on MacOS, Windows, and Ubuntu every 24 hours and on every commit.
import argparse
from wandb_utils import WandbLogger
WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
def create_dataset_artifact(opt):
logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project')
parser.add_argument('--entity', default=None, help='W&B entity')
parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run')
opt = parser.parse_args()
opt.resume = False # Explicitly disallow resume check for dataset upload job
create_dataset_artifact(opt)
import sys
from pathlib import Path
import wandb
FILE = Path(__file__).resolve()
ROOT = FILE.parents[3] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
from train import train, parse_opt
from utils.general import increment_path
from utils.torch_utils import select_device
from utils.callbacks import Callbacks
def sweep():
wandb.init()
# Get hyp dict from sweep agent
hyp_dict = vars(wandb.config).get("_items")
# Workaround: get necessary opt args
opt = parse_opt(known=True)
opt.batch_size = hyp_dict.get("batch_size")
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
opt.epochs = hyp_dict.get("epochs")
opt.nosave = True
opt.data = hyp_dict.get("data")
device = select_device(opt.device, batch_size=opt.batch_size)
# train
train(hyp_dict, opt, device, callbacks=Callbacks())
if __name__ == "__main__":
sweep()
# Hyperparameters for training
# To set range-
# Provide min and max values as:
# parameter:
#
# min: scalar
# max: scalar
# OR
#
# Set a specific list of search space-
# parameter:
# values: [scalar1, scalar2, scalar3...]
#
# You can use grid, bayesian and hyperopt search strategy
# For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration
program: utils/loggers/wandb/sweep.py
method: random
metric:
name: metrics/mAP_0.5
goal: maximize
parameters:
# hyperparameters: set either min, max range or values list
data:
value: "data/coco128.yaml"
batch_size:
values: [64]
epochs:
values: [10]
lr0:
distribution: uniform
min: 1e-5
max: 1e-1
lrf:
distribution: uniform
min: 0.01
max: 1.0
momentum:
distribution: uniform
min: 0.6
max: 0.98
weight_decay:
distribution: uniform
min: 0.0
max: 0.001
warmup_epochs:
distribution: uniform
min: 0.0
max: 5.0
warmup_momentum:
distribution: uniform
min: 0.0
max: 0.95
warmup_bias_lr:
distribution: uniform
min: 0.0
max: 0.2
box:
distribution: uniform
min: 0.02
max: 0.2
cls:
distribution: uniform
min: 0.2
max: 4.0
cls_pw:
distribution: uniform
min: 0.5
max: 2.0
obj:
distribution: uniform
min: 0.2
max: 4.0
obj_pw:
distribution: uniform
min: 0.5
max: 2.0
iou_t:
distribution: uniform
min: 0.1
max: 0.7
anchor_t:
distribution: uniform
min: 2.0
max: 8.0
fl_gamma:
distribution: uniform
min: 0.0
max: 0.1
hsv_h:
distribution: uniform
min: 0.0
max: 0.1
hsv_s:
distribution: uniform
min: 0.0
max: 0.9
hsv_v:
distribution: uniform
min: 0.0
max: 0.9
degrees:
distribution: uniform
min: 0.0
max: 45.0
translate:
distribution: uniform
min: 0.0
max: 0.9
scale:
distribution: uniform
min: 0.0
max: 0.9
shear:
distribution: uniform
min: 0.0
max: 10.0
perspective:
distribution: uniform
min: 0.0
max: 0.001
flipud:
distribution: uniform
min: 0.0
max: 1.0
fliplr:
distribution: uniform
min: 0.0
max: 1.0
mosaic:
distribution: uniform
min: 0.0
max: 1.0
mixup:
distribution: uniform
min: 0.0
max: 1.0
copy_paste:
distribution: uniform
min: 0.0
max: 1.0
"""Utilities and tools for tracking runs with Weights & Biases."""
import logging
import os
import sys
from contextlib import contextmanager
from pathlib import Path
import pkg_resources as pkg
import yaml
from tqdm import tqdm
FILE = Path(__file__).resolve()
ROOT = FILE.parents[3] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
from utils.datasets import LoadImagesAndLabels
from utils.datasets import img2label_paths
from utils.general import check_dataset, check_file
try:
import wandb
assert hasattr(wandb, '__version__') # verify package import not local dir
except (ImportError, AssertionError):
wandb = None
RANK = int(os.getenv('RANK', -1))
WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX):
return from_string[len(prefix):]
def check_wandb_config_file(data_config_file):
wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1)) # updated data.yaml path
if Path(wandb_config).is_file():
return wandb_config
return data_config_file
def check_wandb_dataset(data_file):
is_trainset_wandb_artifact = False
is_valset_wandb_artifact = False
if check_file(data_file) and data_file.endswith('.yaml'):
with open(data_file, errors='ignore') as f:
data_dict = yaml.safe_load(f)
is_trainset_wandb_artifact = (isinstance(data_dict['train'], str) and
data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX))
is_valset_wandb_artifact = (isinstance(data_dict['val'], str) and
data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX))
if is_trainset_wandb_artifact or is_valset_wandb_artifact:
return data_dict
else:
return check_dataset(data_file)
def get_run_info(run_path):
run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX))
run_id = run_path.stem
project = run_path.parent.stem
entity = run_path.parent.parent.stem
model_artifact_name = 'run_' + run_id + '_model'
return entity, project, run_id, model_artifact_name
def check_wandb_resume(opt):
process_wandb_config_ddp_mode(opt) if RANK not in [-1, 0] else None
if isinstance(opt.resume, str):
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
if RANK not in [-1, 0]: # For resuming DDP runs
entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
api = wandb.Api()
artifact = api.artifact(entity + '/' + project + '/' + model_artifact_name + ':latest')
modeldir = artifact.download()
opt.weights = str(Path(modeldir) / "last.pt")
return True
return None
def process_wandb_config_ddp_mode(opt):
with open(check_file(opt.data), errors='ignore') as f:
data_dict = yaml.safe_load(f) # data dict
train_dir, val_dir = None, None
if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
api = wandb.Api()
train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias)
train_dir = train_artifact.download()
train_path = Path(train_dir) / 'data/images/'
data_dict['train'] = str(train_path)
if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX):
api = wandb.Api()
val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias)
val_dir = val_artifact.download()
val_path = Path(val_dir) / 'data/images/'
data_dict['val'] = str(val_path)
if train_dir or val_dir:
ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml')
with open(ddp_data_path, 'w') as f:
yaml.safe_dump(data_dict, f)
opt.data = ddp_data_path
class WandbLogger():
"""Log training runs, datasets, models, and predictions to Weights & Biases.
This logger sends information to W&B at wandb.ai. By default, this information
includes hyperparameters, system configuration and metrics, model metrics,
and basic data metrics and analyses.
By providing additional command line arguments to train.py, datasets,
models and predictions can also be logged.
For more on how this logger is used, see the Weights & Biases documentation:
https://docs.wandb.com/guides/integrations/yolov5
"""
def __init__(self, opt, run_id=None, job_type='Training'):
"""
- Initialize WandbLogger instance
- Upload dataset if opt.upload_dataset is True
- Setup trainig processes if job_type is 'Training'
arguments:
opt (namespace) -- Commandline arguments for this run
run_id (str) -- Run ID of W&B run to be resumed
job_type (str) -- To set the job_type for this run
"""
# Pre-training routine --
self.job_type = job_type
self.wandb, self.wandb_run = wandb, None if not wandb else wandb.run
self.val_artifact, self.train_artifact = None, None
self.train_artifact_path, self.val_artifact_path = None, None
self.result_artifact = None
self.val_table, self.result_table = None, None
self.bbox_media_panel_images = []
self.val_table_path_map = None
self.max_imgs_to_log = 16
self.wandb_artifact_data_dict = None
self.data_dict = None
# It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call
if isinstance(opt.resume, str): # checks resume from artifact
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name
assert wandb, 'install wandb to resume wandb runs'
# Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config
self.wandb_run = wandb.init(id=run_id,
project=project,
entity=entity,
resume='allow',
allow_val_change=True)
opt.resume = model_artifact_name
elif self.wandb:
self.wandb_run = wandb.init(config=opt,
resume="allow",
project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
entity=opt.entity,
name=opt.name if opt.name != 'exp' else None,
job_type=job_type,
id=run_id,
allow_val_change=True) if not wandb.run else wandb.run
if self.wandb_run:
if self.job_type == 'Training':
if opt.upload_dataset:
if not opt.resume:
self.wandb_artifact_data_dict = self.check_and_upload_dataset(opt)
if opt.resume:
# resume from artifact
if isinstance(opt.resume, str) and opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
self.data_dict = dict(self.wandb_run.config.data_dict)
else: # local resume
self.data_dict = check_wandb_dataset(opt.data)
else:
self.data_dict = check_wandb_dataset(opt.data)
self.wandb_artifact_data_dict = self.wandb_artifact_data_dict or self.data_dict
# write data_dict to config. useful for resuming from artifacts. Do this only when not resuming.
self.wandb_run.config.update({'data_dict': self.wandb_artifact_data_dict},
allow_val_change=True)
self.setup_training(opt)
if self.job_type == 'Dataset Creation':
self.data_dict = self.check_and_upload_dataset(opt)
def check_and_upload_dataset(self, opt):
"""
Check if the dataset format is compatible and upload it as W&B artifact
arguments:
opt (namespace)-- Commandline arguments for current run
returns:
Updated dataset info dictionary where local dataset paths are replaced by WAND_ARFACT_PREFIX links.
"""
assert wandb, 'Install wandb to upload dataset'
config_path = self.log_dataset_artifact(opt.data,
opt.single_cls,
'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem)
print("Created dataset config file ", config_path)
with open(config_path, errors='ignore') as f:
wandb_data_dict = yaml.safe_load(f)
return wandb_data_dict
def setup_training(self, opt):
"""
Setup the necessary processes for training YOLO models:
- Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX
- Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded
- Setup log_dict, initialize bbox_interval
arguments:
opt (namespace) -- commandline arguments for this run
"""
self.log_dict, self.current_epoch = {}, 0
self.bbox_interval = opt.bbox_interval
if isinstance(opt.resume, str):
modeldir, _ = self.download_model_artifact(opt)
if modeldir:
self.weights = Path(modeldir) / "last.pt"
config = self.wandb_run.config
opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp = str(
self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs, \
config.hyp
data_dict = self.data_dict
if self.val_artifact is None: # If --upload_dataset is set, use the existing artifact, don't download
self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(data_dict.get('train'),
opt.artifact_alias)
self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(data_dict.get('val'),
opt.artifact_alias)
if self.train_artifact_path is not None:
train_path = Path(self.train_artifact_path) / 'data/images/'
data_dict['train'] = str(train_path)
if self.val_artifact_path is not None:
val_path = Path(self.val_artifact_path) / 'data/images/'
data_dict['val'] = str(val_path)
if self.val_artifact is not None:
self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
self.result_table = wandb.Table(["epoch", "id", "ground truth", "prediction", "avg_confidence"])
self.val_table = self.val_artifact.get("val")
if self.val_table_path_map is None:
self.map_val_table_path()
if opt.bbox_interval == -1:
self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1
train_from_artifact = self.train_artifact_path is not None and self.val_artifact_path is not None
# Update the the data_dict to point to local artifacts dir
if train_from_artifact:
self.data_dict = data_dict
def download_dataset_artifact(self, path, alias):
"""
download the model checkpoint artifact if the path starts with WANDB_ARTIFACT_PREFIX
arguments:
path -- path of the dataset to be used for training
alias (str)-- alias of the artifact to be download/used for training
returns:
(str, wandb.Artifact) -- path of the downladed dataset and it's corresponding artifact object if dataset
is found otherwise returns (None, None)
"""
if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX):
artifact_path = Path(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
dataset_artifact = wandb.use_artifact(artifact_path.as_posix().replace("\\", "/"))
assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'"
datadir = dataset_artifact.download()
return datadir, dataset_artifact
return None, None
def download_model_artifact(self, opt):
"""
download the model checkpoint artifact if the resume path starts with WANDB_ARTIFACT_PREFIX
arguments:
opt (namespace) -- Commandline arguments for this run
"""
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest")
assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist'
modeldir = model_artifact.download()
epochs_trained = model_artifact.metadata.get('epochs_trained')
total_epochs = model_artifact.metadata.get('total_epochs')
is_finished = total_epochs is None
assert not is_finished, 'training is finished, can only resume incomplete runs.'
return modeldir, model_artifact
return None, None
def log_model(self, path, opt, epoch, fitness_score, best_model=False):
"""
Log the model checkpoint as W&B artifact
arguments:
path (Path) -- Path of directory containing the checkpoints
opt (namespace) -- Command line arguments for this run
epoch (int) -- Current epoch number
fitness_score (float) -- fitness score for current epoch
best_model (boolean) -- Boolean representing if the current checkpoint is the best yet.
"""
model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model', type='model', metadata={
'original_url': str(path),
'epochs_trained': epoch + 1,
'save period': opt.save_period,
'project': opt.project,
'total_epochs': opt.epochs,
'fitness_score': fitness_score
})
model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
wandb.log_artifact(model_artifact,
aliases=['latest', 'last', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
print("Saving model artifact on epoch ", epoch + 1)
def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
"""
Log the dataset as W&B artifact and return the new data file with W&B links
arguments:
data_file (str) -- the .yaml file with information about the dataset like - path, classes etc.
single_class (boolean) -- train multi-class data as single-class
project (str) -- project name. Used to construct the artifact path
overwrite_config (boolean) -- overwrites the data.yaml file if set to true otherwise creates a new
file with _wandb postfix. Eg -> data_wandb.yaml
returns:
the new .yaml file with artifact links. it can be used to start training directly from artifacts
"""
self.data_dict = check_dataset(data_file) # parse and check
data = dict(self.data_dict)
nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names'])
names = {k: v for k, v in enumerate(names)} # to index dictionary
self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(
data['train'], rect=True, batch_size=1), names, name='train') if data.get('train') else None
self.val_artifact = self.create_dataset_table(LoadImagesAndLabels(
data['val'], rect=True, batch_size=1), names, name='val') if data.get('val') else None
if data.get('train'):
data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train')
if data.get('val'):
data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val')
path = Path(data_file).stem
path = (path if overwrite_config else path + '_wandb') + '.yaml' # updated data.yaml path
data.pop('download', None)
data.pop('path', None)
with open(path, 'w') as f:
yaml.safe_dump(data, f)
if self.job_type == 'Training': # builds correct artifact pipeline graph
self.wandb_run.use_artifact(self.val_artifact)
self.wandb_run.use_artifact(self.train_artifact)
self.val_artifact.wait()
self.val_table = self.val_artifact.get('val')
self.map_val_table_path()
else:
self.wandb_run.log_artifact(self.train_artifact)
self.wandb_run.log_artifact(self.val_artifact)
return path
def map_val_table_path(self):
"""
Map the validation dataset Table like name of file -> it's id in the W&B Table.
Useful for - referencing artifacts for evaluation.
"""
self.val_table_path_map = {}
print("Mapping dataset")
for i, data in enumerate(tqdm(self.val_table.data)):
self.val_table_path_map[data[3]] = data[0]
def create_dataset_table(self, dataset, class_to_id, name='dataset'):
"""
Create and return W&B artifact containing W&B Table of the dataset.
arguments:
dataset (LoadImagesAndLabels) -- instance of LoadImagesAndLabels class used to iterate over the data to build Table
class_to_id (dict(int, str)) -- hash map that maps class ids to labels
name (str) -- name of the artifact
returns:
dataset artifact to be logged or used
"""
# TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
artifact = wandb.Artifact(name=name, type="dataset")
img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None
img_files = tqdm(dataset.img_files) if not img_files else img_files
for img_file in img_files:
if Path(img_file).is_dir():
artifact.add_dir(img_file, name='data/images')
labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
artifact.add_dir(labels_path, name='data/labels')
else:
artifact.add_file(img_file, name='data/images/' + Path(img_file).name)
label_file = Path(img2label_paths([img_file])[0])
artifact.add_file(str(label_file),
name='data/labels/' + label_file.name) if label_file.exists() else None
table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()])
for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
box_data, img_classes = [], {}
for cls, *xywh in labels[:, 1:].tolist():
cls = int(cls)
box_data.append({"position": {"middle": [xywh[0], xywh[1]], "width": xywh[2], "height": xywh[3]},
"class_id": cls,
"box_caption": "%s" % (class_to_id[cls])})
img_classes[cls] = class_to_id[cls]
boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}} # inference-space
table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), list(img_classes.values()),
Path(paths).name)
artifact.add(table, name)
return artifact
def log_training_progress(self, predn, path, names):
"""
Build evaluation Table. Uses reference from validation dataset table.
arguments:
predn (list): list of predictions in the native space in the format - [xmin, ymin, xmax, ymax, confidence, class]
path (str): local path of the current evaluation image
names (dict(int, str)): hash map that maps class ids to labels
"""
class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()])
box_data = []
total_conf = 0
for *xyxy, conf, cls in predn.tolist():
if conf >= 0.25:
box_data.append(
{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
"class_id": int(cls),
"box_caption": "%s %.3f" % (names[cls], conf),
"scores": {"class_score": conf},
"domain": "pixel"})
total_conf = total_conf + conf
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
id = self.val_table_path_map[Path(path).name]
self.result_table.add_data(self.current_epoch,
id,
self.val_table.data[id][1],
wandb.Image(self.val_table.data[id][1], boxes=boxes, classes=class_set),
total_conf / max(1, len(box_data))
)
def val_one_image(self, pred, predn, path, names, im):
"""
Log validation data for one image. updates the result Table if validation dataset is uploaded and log bbox media panel
arguments:
pred (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
predn (list): list of predictions in the native space - [xmin, ymin, xmax, ymax, confidence, class]
path (str): local path of the current evaluation image
"""
if self.val_table and self.result_table: # Log Table if Val dataset is uploaded as artifact
self.log_training_progress(predn, path, names)
if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0:
if self.current_epoch % self.bbox_interval == 0:
box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
"class_id": int(cls),
"box_caption": "%s %.3f" % (names[cls], conf),
"scores": {"class_score": conf},
"domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes, caption=path.name))
def log(self, log_dict):
"""
save the metrics to the logging dictionary
arguments:
log_dict (Dict) -- metrics/media to be logged in current step
"""
if self.wandb_run:
for key, value in log_dict.items():
self.log_dict[key] = value
def end_epoch(self, best_result=False):
"""
commit the log_dict, model artifacts and Tables to W&B and flush the log_dict.
arguments:
best_result (boolean): Boolean representing if the result of this evaluation is best or not
"""
if self.wandb_run:
with all_logging_disabled():
if self.bbox_media_panel_images:
self.log_dict["Bounding Box Debugger/Images"] = self.bbox_media_panel_images
wandb.log(self.log_dict)
self.log_dict = {}
self.bbox_media_panel_images = []
if self.result_artifact:
self.result_artifact.add(self.result_table, 'result')
wandb.log_artifact(self.result_artifact, aliases=['latest', 'last', 'epoch ' + str(self.current_epoch),
('best' if best_result else '')])
wandb.log({"evaluation": self.result_table})
self.result_table = wandb.Table(["epoch", "id", "ground truth", "prediction", "avg_confidence"])
self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
def finish_run(self):
"""
Log metrics if any and finish the current W&B run
"""
if self.wandb_run:
if self.log_dict:
with all_logging_disabled():
wandb.log(self.log_dict)
wandb.run.finish()
@contextmanager
def all_logging_disabled(highest_level=logging.CRITICAL):
""" source - https://gist.github.com/simon-weber/7853144
A context manager that will prevent any logging messages triggered during the body from being processed.
:param highest_level: the maximum logging level in use.
This would only need to be changed if a custom level greater than CRITICAL is defined.
"""
previous_level = logging.root.manager.disable
logging.disable(highest_level)
try:
yield
finally:
logging.disable(previous_level)
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Loss functions
"""
import torch
import torch.nn as nn
from yolov5processor.utils.metrics import bbox_iou
from yolov5processor.utils.torch_utils import is_parallel
def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
# return positive, negative label smoothing BCE targets
return 1.0 - 0.5 * eps, 0.5 * eps
class BCEBlurWithLogitsLoss(nn.Module):
# BCEwithLogitLoss() with reduced missing label effects.
def __init__(self, alpha=0.05):
super(BCEBlurWithLogitsLoss, self).__init__()
self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss()
self.alpha = alpha
def forward(self, pred, true):
loss = self.loss_fcn(pred, true)
pred = torch.sigmoid(pred) # prob from logits
dx = pred - true # reduce only missing label effects
# dx = (pred - true).abs() # reduce missing label and false label effects
alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))
loss *= alpha_factor
return loss.mean()
class FocalLoss(nn.Module):
# Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
super(FocalLoss, self).__init__()
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
self.gamma = gamma
self.alpha = alpha
self.reduction = loss_fcn.reduction
self.loss_fcn.reduction = 'none' # required to apply FL to each element
def forward(self, pred, true):
loss = self.loss_fcn(pred, true)
# p_t = torch.exp(-loss)
# loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
# TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
pred_prob = torch.sigmoid(pred) # prob from logits
p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
modulating_factor = (1.0 - p_t) ** self.gamma
loss *= alpha_factor * modulating_factor
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
return loss
class QFocalLoss(nn.Module):
# Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
super(QFocalLoss, self).__init__()
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
self.gamma = gamma
self.alpha = alpha
self.reduction = loss_fcn.reduction
self.loss_fcn.reduction = 'none' # required to apply FL to each element
def forward(self, pred, true):
loss = self.loss_fcn(pred, true)
pred_prob = torch.sigmoid(pred) # prob from logits
alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
modulating_factor = torch.abs(true - pred_prob) ** self.gamma
loss *= alpha_factor * modulating_factor
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
return loss
class ComputeLoss:
# Compute losses
def __init__(self, model, autobalance=False):
self.sort_obj_iou = False
device = next(model.parameters()).device # get model device
h = model.hyp # hyperparameters
# Define criteria
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets
# Focal loss
g = h['fl_gamma'] # focal loss gamma
if g > 0:
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module
self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7
self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index
self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
for k in 'na', 'nc', 'nl', 'anchors':
setattr(self, k, getattr(det, k))
def __call__(self, p, targets): # predictions, targets, model
device = targets.device
lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets
# Losses
for i, pi in enumerate(p): # layer index, layer predictions
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
tobj = torch.zeros_like(pi[..., 0], device=device) # target obj
n = b.shape[0] # number of targets
if n:
ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
# Regression
pxy = ps[:, :2].sigmoid() * 2. - 0.5
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1) # predicted box
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)
lbox += (1.0 - iou).mean() # iou loss
# Objectness
score_iou = iou.detach().clamp(0).type(tobj.dtype)
if self.sort_obj_iou:
sort_id = torch.argsort(score_iou)
b, a, gj, gi, score_iou = b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id]
tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou # iou ratio
# Classification
if self.nc > 1: # cls loss (only if multiple classes)
t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets
t[range(n), tcls[i]] = self.cp
lcls += self.BCEcls(ps[:, 5:], t) # BCE
# Append targets to text file
# with open('targets.txt', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
obji = self.BCEobj(pi[..., 4], tobj)
lobj += obji * self.balance[i] # obj loss
if self.autobalance:
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
if self.autobalance:
self.balance = [x / self.balance[self.ssi] for x in self.balance]
lbox *= self.hyp['box']
lobj *= self.hyp['obj']
lcls *= self.hyp['cls']
bs = tobj.shape[0] # batch size
return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
def build_targets(self, p, targets):
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
na, nt = self.na, targets.shape[0] # number of anchors, targets
tcls, tbox, indices, anch = [], [], [], []
gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
g = 0.5 # bias
off = torch.tensor([[0, 0],
[1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
], device=targets.device).float() * g # offsets
for i in range(self.nl):
anchors = self.anchors[i]
gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain
# Match targets to anchors
t = targets * gain
if nt:
# Matches
r = t[:, :, 4:6] / anchors[:, None] # wh ratio
j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # compare
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
t = t[j] # filter
# Offsets
gxy = t[:, 2:4] # grid xy
gxi = gain[[2, 3]] - gxy # inverse
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
l, m = ((gxi % 1. < g) & (gxi > 1.)).T
j = torch.stack((torch.ones_like(j), j, k, l, m))
t = t.repeat((5, 1, 1))[j]
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
else:
t = targets[0]
offsets = 0
# Define
b, c = t[:, :2].long().T # image, class
gxy = t[:, 2:4] # grid xy
gwh = t[:, 4:6] # grid wh
gij = (gxy - offsets).long()
gi, gj = gij.T # grid xy indices
# Append
a = t[:, 6].long() # anchor indices
indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
anch.append(anchors[a]) # anchors
tcls.append(c) # class
return tcls, tbox, indices, anch
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Model validation metrics
"""
import math
import warnings
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import torch
def fitness(x):
# Model fitness as a weighted combination of metrics
w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
return (x[:, :4] * w).sum(1)
def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=()):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments
tp: True positives (nparray, nx1 or nx10).
conf: Objectness value from 0-1 (nparray).
pred_cls: Predicted object classes (nparray).
target_cls: True object classes (nparray).
plot: Plot precision-recall curve at mAP@0.5
save_dir: Plot save directory
# Returns
The average precision as computed in py-faster-rcnn.
"""
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
# Find unique classes
unique_classes = np.unique(target_cls)
nc = unique_classes.shape[0] # number of classes, number of detections
# Create Precision-Recall curve and compute AP for each class
px, py = np.linspace(0, 1, 1000), [] # for plotting
ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
for ci, c in enumerate(unique_classes):
i = pred_cls == c
n_l = (target_cls == c).sum() # number of labels
n_p = i.sum() # number of predictions
if n_p == 0 or n_l == 0:
continue
else:
# Accumulate FPs and TPs
fpc = (1 - tp[i]).cumsum(0)
tpc = tp[i].cumsum(0)
# Recall
recall = tpc / (n_l + 1e-16) # recall curve
r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
# Precision
precision = tpc / (tpc + fpc) # precision curve
p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
# AP from recall-precision curve
for j in range(tp.shape[1]):
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
if plot and j == 0:
py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
# Compute F1 (harmonic mean of precision and recall)
f1 = 2 * p * r / (p + r + 1e-16)
if plot:
plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
i = f1.mean(0).argmax() # max F1 index
return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')
def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves
# Arguments
recall: The recall curve (list)
precision: The precision curve (list)
# Returns
Average precision, precision curve, recall curve
"""
# Append sentinel values to beginning and end
mrec = np.concatenate(([0.0], recall, [1.0]))
mpre = np.concatenate(([1.0], precision, [0.0]))
# Compute the precision envelope
mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
# Integrate area under curve
method = 'interp' # methods: 'continuous', 'interp'
if method == 'interp':
x = np.linspace(0, 1, 101) # 101-point interp (COCO)
ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
else: # 'continuous'
i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
return ap, mpre, mrec
class ConfusionMatrix:
# Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
def __init__(self, nc, conf=0.25, iou_thres=0.45):
self.matrix = np.zeros((nc + 1, nc + 1))
self.nc = nc # number of classes
self.conf = conf
self.iou_thres = iou_thres
def process_batch(self, detections, labels):
"""
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Arguments:
detections (Array[N, 6]), x1, y1, x2, y2, conf, class
labels (Array[M, 5]), class, x1, y1, x2, y2
Returns:
None, updates confusion matrix accordingly
"""
detections = detections[detections[:, 4] > self.conf]
gt_classes = labels[:, 0].int()
detection_classes = detections[:, 5].int()
iou = box_iou(labels[:, 1:], detections[:, :4])
x = torch.where(iou > self.iou_thres)
if x[0].shape[0]:
matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
if x[0].shape[0] > 1:
matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
else:
matches = np.zeros((0, 3))
n = matches.shape[0] > 0
m0, m1, _ = matches.transpose().astype(np.int16)
for i, gc in enumerate(gt_classes):
j = m0 == i
if n and sum(j) == 1:
self.matrix[detection_classes[m1[j]], gc] += 1 # correct
else:
self.matrix[self.nc, gc] += 1 # background FP
if n:
for i, dc in enumerate(detection_classes):
if not any(m1 == i):
self.matrix[dc, self.nc] += 1 # background FN
def matrix(self):
return self.matrix
def plot(self, normalize=True, save_dir='', names=()):
try:
import seaborn as sn
array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-6) if normalize else 1) # normalize columns
array[array < 0.005] = np.nan # don't annotate (would appear as 0.00)
fig = plt.figure(figsize=(12, 9), tight_layout=True)
sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size
labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered
sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
xticklabels=names + ['background FP'] if labels else "auto",
yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1))
fig.axes[0].set_xlabel('True')
fig.axes[0].set_ylabel('Predicted')
fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
plt.close()
except Exception as e:
print(f'WARNING: ConfusionMatrix plot failure: {e}')
def print(self):
for i in range(self.nc + 1):
print(' '.join(map(str, self.matrix[i])))
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2 = box2.T
# Get the coordinates of bounding boxes
if x1y1x2y2: # x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
else: # transform from xywh to xyxy
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
# Intersection area
inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
union = w1 * h1 + w2 * h2 - inter + eps
iou = inter / union
if GIoU or DIoU or CIoU:
cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width
ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared
rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
(b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared
if DIoU:
return iou - rho2 / c2 # DIoU
elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
with torch.no_grad():
alpha = v / (v - iou + (1 + eps))
return iou - (rho2 / c2 + v * alpha) # CIoU
else: # GIoU https://arxiv.org/pdf/1902.09630.pdf
c_area = cw * ch + eps # convex area
return iou - (c_area - union) / c_area # GIoU
else:
return iou # IoU
def box_iou(box1, box2):
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
"""
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Arguments:
box1 (Tensor[N, 4])
box2 (Tensor[M, 4])
Returns:
iou (Tensor[N, M]): the NxM matrix containing the pairwise
IoU values for every element in boxes1 and boxes2
"""
def box_area(box):
# box = 4xn
return (box[2] - box[0]) * (box[3] - box[1])
area1 = box_area(box1.T)
area2 = box_area(box2.T)
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
def bbox_ioa(box1, box2, eps=1E-7):
""" Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
box1: np.array of shape(4)
box2: np.array of shape(nx4)
returns: np.array of shape(n)
"""
box2 = box2.transpose()
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
# Intersection area
inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
# box2 area
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
# Intersection over box2 area
return inter_area / box2_area
def wh_iou(wh1, wh2):
# Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
wh1 = wh1[:, None] # [N,1,2]
wh2 = wh2[None] # [1,M,2]
inter = torch.min(wh1, wh2).prod(2) # [N,M]
return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter)
# Plots ----------------------------------------------------------------------------------------------------------------
def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()):
# Precision-recall curve
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
py = np.stack(py, axis=1)
if 0 < len(names) < 21: # display per-class legend if < 21 classes
for i, y in enumerate(py.T):
ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision)
else:
ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)
ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
fig.savefig(Path(save_dir), dpi=250)
plt.close()
def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'):
# Metric-confidence curve
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
if 0 < len(names) < 21: # display per-class legend if < 21 classes
for i, y in enumerate(py):
ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric)
else:
ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric)
y = py.mean(0)
ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
fig.savefig(Path(save_dir), dpi=250)
plt.close()
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Plotting utils
"""
import math
import os
from copy import copy
from pathlib import Path
import cv2
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn
import torch
from PIL import Image, ImageDraw, ImageFont
from yolov5processor.utils.general import user_config_dir, is_ascii, is_chinese, xywh2xyxy, xyxy2xywh
from yolov5processor.utils.metrics import fitness
# Settings
CONFIG_DIR = user_config_dir() # Ultralytics settings dir
RANK = int(os.getenv('RANK', -1))
matplotlib.rc('font', **{'size': 11})
matplotlib.use('Agg') # for writing to files only
class Colors:
# Ultralytics color palette https://ultralytics.com/
def __init__(self):
# hex = matplotlib.colors.TABLEAU_COLORS.values()
hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
self.palette = [self.hex2rgb('#' + c) for c in hex]
self.n = len(self.palette)
def __call__(self, i, bgr=False):
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
@staticmethod
def hex2rgb(h): # rgb order (PIL)
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
colors = Colors() # create instance for 'from utils.plots import colors'
def check_font(font='Arial.ttf', size=10):
# Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
font = Path(font)
font = font if font.exists() else (CONFIG_DIR / font.name)
try:
return ImageFont.truetype(str(font) if font.exists() else font.name, size)
except Exception as e: # download if missing
url = "https://ultralytics.com/assets/" + font.name
print(f'Downloading {url} to {font}...')
torch.hub.download_url_to_file(url, str(font), progress=False)
return ImageFont.truetype(str(font), size)
class Annotator:
if RANK in (-1, 0):
check_font() # download TTF if necessary
# YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):
assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'
self.pil = pil or not is_ascii(example) or is_chinese(example)
if self.pil: # use PIL
self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
self.draw = ImageDraw.Draw(self.im)
self.font = check_font(font='Arial.Unicode.ttf' if is_chinese(example) else font,
size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12))
else: # use cv2
self.im = im
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
# Add one xyxy box to image with label
if self.pil or not is_ascii(label):
self.draw.rectangle(box, width=self.lw, outline=color) # box
if label:
w, h = self.font.getsize(label) # text width, height
outside = box[1] - h >= 0 # label fits outside box
self.draw.rectangle([box[0],
box[1] - h if outside else box[1],
box[0] + w + 1,
box[1] + 1 if outside else box[1] + h + 1], fill=color)
# self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0
self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
else: # cv2
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
if label:
tf = max(self.lw - 1, 1) # font thickness
w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
outside = p1[1] - h - 3 >= 0 # label fits outside box
p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled
cv2.putText(self.im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, self.lw / 3, txt_color,
thickness=tf, lineType=cv2.LINE_AA)
def rectangle(self, xy, fill=None, outline=None, width=1):
# Add rectangle to image (PIL-only)
self.draw.rectangle(xy, fill, outline, width)
def text(self, xy, text, txt_color=(255, 255, 255)):
# Add text to image (PIL-only)
w, h = self.font.getsize(text) # text width, height
self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font)
def result(self):
# Return annotated image as array
return np.asarray(self.im)
def hist2d(x, y, n=100):
# 2d histogram used in labels.png and evolve.png
xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges))
xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1)
yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1)
return np.log(hist[xidx, yidx])
def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
from scipy.signal import butter, filtfilt
# https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy
def butter_lowpass(cutoff, fs, order):
nyq = 0.5 * fs
normal_cutoff = cutoff / nyq
return butter(order, normal_cutoff, btype='low', analog=False)
b, a = butter_lowpass(cutoff, fs, order=order)
return filtfilt(b, a, data) # forward-backward filter
def output_to_target(output):
# Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
targets = []
for i, o in enumerate(output):
for *box, conf, cls in o.cpu().numpy():
targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
return np.array(targets)
def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=1920, max_subplots=16):
# Plot image grid with labels
if isinstance(images, torch.Tensor):
images = images.cpu().float().numpy()
if isinstance(targets, torch.Tensor):
targets = targets.cpu().numpy()
if np.max(images[0]) <= 1:
images *= 255.0 # de-normalise (optional)
bs, _, h, w = images.shape # batch size, _, height, width
bs = min(bs, max_subplots) # limit plot images
ns = np.ceil(bs ** 0.5) # number of subplots (square)
# Build Image
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
for i, im in enumerate(images):
if i == max_subplots: # if last batch has fewer images than we expect
break
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
im = im.transpose(1, 2, 0)
mosaic[y:y + h, x:x + w, :] = im
# Resize (optional)
scale = max_size / ns / max(h, w)
if scale < 1:
h = math.ceil(scale * h)
w = math.ceil(scale * w)
mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
# Annotate
fs = int((h + w) * ns * 0.01) # font size
annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True)
for i in range(i + 1):
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders
if paths:
annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames
if len(targets) > 0:
ti = targets[targets[:, 0] == i] # image targets
boxes = xywh2xyxy(ti[:, 2:6]).T
classes = ti[:, 1].astype('int')
labels = ti.shape[1] == 6 # labels if no conf column
conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred)
if boxes.shape[1]:
if boxes.max() <= 1.01: # if normalized with tolerance 0.01
boxes[[0, 2]] *= w # scale to pixels
boxes[[1, 3]] *= h
elif scale < 1: # absolute coords need scale if image scales
boxes *= scale
boxes[[0, 2]] += x
boxes[[1, 3]] += y
for j, box in enumerate(boxes.T.tolist()):
cls = classes[j]
color = colors(cls)
cls = names[cls] if names else cls
if labels or conf[j] > 0.25: # 0.25 conf thresh
label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
annotator.box_label(box, label, color=color)
annotator.im.save(fname) # save
def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
# Plot LR simulating training for full epochs
optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals
y = []
for _ in range(epochs):
scheduler.step()
y.append(optimizer.param_groups[0]['lr'])
plt.plot(y, '.-', label='LR')
plt.xlabel('epoch')
plt.ylabel('LR')
plt.grid()
plt.xlim(0, epochs)
plt.ylim(0)
plt.savefig(Path(save_dir) / 'LR.png', dpi=200)
plt.close()
def plot_val_txt(): # from utils.plots import *; plot_val()
# Plot val.txt histograms
x = np.loadtxt('val.txt', dtype=np.float32)
box = xyxy2xywh(x[:, :4])
cx, cy = box[:, 0], box[:, 1]
fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True)
ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
ax.set_aspect('equal')
plt.savefig('hist2d.png', dpi=300)
fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True)
ax[0].hist(cx, bins=600)
ax[1].hist(cy, bins=600)
plt.savefig('hist1d.png', dpi=200)
def plot_targets_txt(): # from utils.plots import *; plot_targets_txt()
# Plot targets.txt histograms
x = np.loadtxt('targets.txt', dtype=np.float32).T
s = ['x targets', 'y targets', 'width targets', 'height targets']
fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
ax = ax.ravel()
for i in range(4):
ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std()))
ax[i].legend()
ax[i].set_title(s[i])
plt.savefig('targets.jpg', dpi=200)
def plot_val_study(file='', dir='', x=None): # from utils.plots import *; plot_val_study()
# Plot file=study.txt generated by val.py (or plot all study*.txt in dir)
save_dir = Path(file).parent if file else Path(dir)
plot2 = False # plot additional results
if plot2:
ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel()
fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
# for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
for f in sorted(save_dir.glob('study*.txt')):
y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
x = np.arange(y.shape[1]) if x is None else np.array(x)
if plot2:
s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_preprocess (ms/img)', 't_inference (ms/img)', 't_NMS (ms/img)']
for i in range(7):
ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8)
ax[i].set_title(s[i])
j = y[3].argmax() + 1
ax2.plot(y[5, 1:j], y[3, 1:j] * 1E2, '.-', linewidth=2, markersize=8,
label=f.stem.replace('study_coco_', '').replace('yolo', 'YOLO'))
ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5],
'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet')
ax2.grid(alpha=0.2)
ax2.set_yticks(np.arange(20, 60, 5))
ax2.set_xlim(0, 57)
ax2.set_ylim(25, 55)
ax2.set_xlabel('GPU Speed (ms/img)')
ax2.set_ylabel('COCO AP val')
ax2.legend(loc='lower right')
f = save_dir / 'study.png'
print(f'Saving {f}...')
plt.savefig(f, dpi=300)
def plot_labels(labels, names=(), save_dir=Path('')):
# plot dataset labels
print('Plotting labels... ')
c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes
nc = int(c.max() + 1) # number of classes
x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])
# seaborn correlogram
sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200)
plt.close()
# matplotlib labels
matplotlib.use('svg') # faster
ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
# [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)] # update colors bug #3195
ax[0].set_ylabel('instances')
if 0 < len(names) < 30:
ax[0].set_xticks(range(len(names)))
ax[0].set_xticklabels(names, rotation=90, fontsize=10)
else:
ax[0].set_xlabel('classes')
sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9)
# rectangles
labels[:, 1:3] = 0.5 # center
labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000
img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255)
for cls, *box in labels[:1000]:
ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot
ax[1].imshow(img)
ax[1].axis('off')
for a in [0, 1, 2, 3]:
for s in ['top', 'right', 'left', 'bottom']:
ax[a].spines[s].set_visible(False)
plt.savefig(save_dir / 'labels.jpg', dpi=200)
matplotlib.use('Agg')
plt.close()
def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
# Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel()
s = ['Images', 'Free Storage (GB)', 'RAM Usage (GB)', 'Battery', 'dt_raw (ms)', 'dt_smooth (ms)', 'real-world FPS']
files = list(Path(save_dir).glob('frames*.txt'))
for fi, f in enumerate(files):
try:
results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows
n = results.shape[1] # number of rows
x = np.arange(start, min(stop, n) if stop else n)
results = results[:, x]
t = (results[0] - results[0].min()) # set t0=0s
results[0] = x
for i, a in enumerate(ax):
if i < len(results):
label = labels[fi] if len(labels) else f.stem.replace('frames_', '')
a.plot(t, results[i], marker='.', label=label, linewidth=1, markersize=5)
a.set_title(s[i])
a.set_xlabel('time (s)')
# if fi == len(files) - 1:
# a.set_ylim(bottom=0)
for side in ['top', 'right']:
a.spines[side].set_visible(False)
else:
a.remove()
except Exception as e:
print('Warning: Plotting error for %s; %s' % (f, e))
ax[1].legend()
plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200)
def plot_evolve(evolve_csv='path/to/evolve.csv'): # from utils.plots import *; plot_evolve()
# Plot evolve.csv hyp evolution results
evolve_csv = Path(evolve_csv)
data = pd.read_csv(evolve_csv)
keys = [x.strip() for x in data.columns]
x = data.values
f = fitness(x)
j = np.argmax(f) # max fitness index
plt.figure(figsize=(10, 12), tight_layout=True)
matplotlib.rc('font', **{'size': 8})
for i, k in enumerate(keys[7:]):
v = x[:, 7 + i]
mu = v[j] # best single result
plt.subplot(6, 5, i + 1)
plt.scatter(v, f, c=hist2d(v, f, 20), cmap='viridis', alpha=.8, edgecolors='none')
plt.plot(mu, f.max(), 'k+', markersize=15)
plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) # limit to 40 characters
if i % 5 != 0:
plt.yticks([])
print('%15s: %.3g' % (k, mu))
f = evolve_csv.with_suffix('.png') # filename
plt.savefig(f, dpi=200)
plt.close()
print(f'Saved {f}')
def plot_results(file='path/to/results.csv', dir=''):
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
save_dir = Path(file).parent if file else Path(dir)
fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
ax = ax.ravel()
files = list(save_dir.glob('results*.csv'))
assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.'
for fi, f in enumerate(files):
try:
data = pd.read_csv(f)
s = [x.strip() for x in data.columns]
x = data.values[:, 0]
for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]):
y = data.values[:, j]
# y[y == 0] = np.nan # don't show zero values
ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=8)
ax[i].set_title(s[j], fontsize=12)
# if j in [8, 9, 10]: # share train and val loss y axes
# ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
except Exception as e:
print(f'Warning: Plotting error for {f}: {e}')
ax[1].legend()
fig.savefig(save_dir / 'results.png', dpi=200)
plt.close()
def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
"""
x: Features to be visualized
module_type: Module type
stage: Module stage within model
n: Maximum number of feature maps to plot
save_dir: Directory to save results
"""
if 'Detect' not in module_type:
batch, channels, height, width = x.shape # batch, channels, height, width
if height > 1 and width > 1:
f = f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename
blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels
n = min(n, channels) # number of plots
fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols
ax = ax.ravel()
plt.subplots_adjust(wspace=0.05, hspace=0.05)
for i in range(n):
ax[i].imshow(blocks[i].squeeze()) # cmap='gray'
ax[i].axis('off')
print(f'Saving {save_dir / f}... ({n}/{channels})')
plt.savefig(save_dir / f, dpi=300, bbox_inches='tight')
plt.close()
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
PyTorch utils
"""
import datetime
import logging
import math
import os
import platform
import subprocess
import time
from contextlib import contextmanager
from copy import deepcopy
from pathlib import Path
import torch
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
import torchvision
try:
import thop # for FLOPs computation
except ImportError:
thop = None
LOGGER = logging.getLogger(__name__)
@contextmanager
def torch_distributed_zero_first(local_rank: int):
"""
Decorator to make all processes in distributed training wait for each local_master to do something.
"""
if local_rank not in [-1, 0]:
dist.barrier(device_ids=[local_rank])
yield
if local_rank == 0:
dist.barrier(device_ids=[0])
def date_modified(path=__file__):
# return human-readable file modification date, i.e. '2021-3-26'
t = datetime.datetime.fromtimestamp(Path(path).stat().st_mtime)
return f'{t.year}-{t.month}-{t.day}'
def git_describe(path=Path(__file__).parent): # path must be a directory
# return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
s = f'git -C {path} describe --tags --long --always'
try:
return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1]
except subprocess.CalledProcessError as e:
return '' # not a git repository
def select_device(device='', batch_size=None):
# device = 'cpu' or '0' or '0,1,2,3'
s = f'YOLOv5 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string
device = str(device).strip().lower().replace('cuda:', '') # to string, 'cuda:0' to '0'
cpu = device == 'cpu'
if cpu:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False
elif device: # non-cpu device requested
os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability
cuda = not cpu and torch.cuda.is_available()
if cuda:
devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7
n = len(devices) # device count
if n > 1 and batch_size: # check batch_size is divisible by device_count
assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
space = ' ' * (len(s) + 1)
for i, d in enumerate(devices):
p = torch.cuda.get_device_properties(i)
s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB
else:
s += 'CPU\n'
LOGGER.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s) # emoji-safe
return torch.device('cuda:0' if cuda else 'cpu')
def time_sync():
# pytorch-accurate time
if torch.cuda.is_available():
torch.cuda.synchronize()
return time.time()
def profile(input, ops, n=10, device=None):
# YOLOv5 speed/memory/FLOPs profiler
#
# Usage:
# input = torch.randn(16, 3, 640, 640)
# m1 = lambda x: x * torch.sigmoid(x)
# m2 = nn.SiLU()
# profile(input, [m1, m2], n=100) # profile over 100 iterations
results = []
logging.basicConfig(format="%(message)s", level=logging.INFO)
device = device or select_device()
print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
f"{'input':>24s}{'output':>24s}")
for x in input if isinstance(input, list) else [input]:
x = x.to(device)
x.requires_grad = True
for m in ops if isinstance(ops, list) else [ops]:
m = m.to(device) if hasattr(m, 'to') else m # device
m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
tf, tb, t = 0., 0., [0., 0., 0.] # dt forward, backward
try:
flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs
except:
flops = 0
try:
for _ in range(n):
t[0] = time_sync()
y = m(x)
t[1] = time_sync()
try:
_ = (sum([yi.sum() for yi in y]) if isinstance(y, list) else y).sum().backward()
t[2] = time_sync()
except Exception as e: # no backward method
print(e)
t[2] = float('nan')
tf += (t[1] - t[0]) * 1000 / n # ms per op forward
tb += (t[2] - t[1]) * 1000 / n # ms per op backward
mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB)
s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list'
s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list'
p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 # parameters
print(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}')
results.append([p, flops, mem, tf, tb, s_in, s_out])
except Exception as e:
print(e)
results.append(None)
torch.cuda.empty_cache()
return results
def is_parallel(model):
# Returns True if model is of type DP or DDP
return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
def de_parallel(model):
# De-parallelize a model: returns single-GPU model if model is of type DP or DDP
return model.module if is_parallel(model) else model
def intersect_dicts(da, db, exclude=()):
# Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
def initialize_weights(model):
for m in model.modules():
t = type(m)
if t is nn.Conv2d:
pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif t is nn.BatchNorm2d:
m.eps = 1e-3
m.momentum = 0.03
elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
m.inplace = True
def find_modules(model, mclass=nn.Conv2d):
# Finds layer indices matching module class 'mclass'
return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
def sparsity(model):
# Return global model sparsity
a, b = 0., 0.
for p in model.parameters():
a += p.numel()
b += (p == 0).sum()
return b / a
def prune(model, amount=0.3):
# Prune model to requested global sparsity
import torch.nn.utils.prune as prune
print('Pruning model... ', end='')
for name, m in model.named_modules():
if isinstance(m, nn.Conv2d):
prune.l1_unstructured(m, name='weight', amount=amount) # prune
prune.remove(m, 'weight') # make permanent
print(' %.3g global sparsity' % sparsity(model))
def fuse_conv_and_bn(conv, bn):
# Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
fusedconv = nn.Conv2d(conv.in_channels,
conv.out_channels,
kernel_size=conv.kernel_size,
stride=conv.stride,
padding=conv.padding,
groups=conv.groups,
bias=True).requires_grad_(False).to(conv.weight.device)
# prepare filters
w_conv = conv.weight.clone().view(conv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
# prepare spatial bias
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
return fusedconv
def model_info(model, verbose=False, img_size=640):
# Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
n_p = sum(x.numel() for x in model.parameters()) # number parameters
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
if verbose:
print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
for i, (name, p) in enumerate(model.named_parameters()):
name = name.replace('module_list.', '')
print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
(i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
try: # FLOPs
from thop import profile
stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32
img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input
flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPs
img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float
fs = ', %.1f GFLOPs' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPs
except (ImportError, Exception):
fs = ''
LOGGER.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
def load_classifier(name='resnet101', n=2):
# Loads a pretrained model reshaped to n-class output
model = torchvision.models.__dict__[name](pretrained=True)
# ResNet model properties
# input_size = [3, 224, 224]
# input_space = 'RGB'
# input_range = [0, 1]
# mean = [0.485, 0.456, 0.406]
# std = [0.229, 0.224, 0.225]
# Reshape output to n classes
filters = model.fc.weight.shape[1]
model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
model.fc.out_features = n
return model
def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
# scales img(bs,3,y,x) by ratio constrained to gs-multiple
if ratio == 1.0:
return img
else:
h, w = img.shape[2:]
s = (int(h * ratio), int(w * ratio)) # new size
img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize
if not same_shape: # pad/crop img
h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
def copy_attr(a, b, include=(), exclude=()):
# Copy attributes from b to a, options to only include [...] and to exclude [...]
for k, v in b.__dict__.items():
if (len(include) and k not in include) or k.startswith('_') or k in exclude:
continue
else:
setattr(a, k, v)
class EarlyStopping:
# YOLOv5 simple early stopper
def __init__(self, patience=30):
self.best_fitness = 0.0 # i.e. mAP
self.best_epoch = 0
self.patience = patience or float('inf') # epochs to wait after fitness stops improving to stop
self.possible_stop = False # possible stop may occur next epoch
def __call__(self, epoch, fitness):
if fitness >= self.best_fitness: # >= 0 to allow for early zero-fitness stage of training
self.best_epoch = epoch
self.best_fitness = fitness
delta = epoch - self.best_epoch # epochs without improvement
self.possible_stop = delta >= (self.patience - 1) # possible stop may occur next epoch
stop = delta >= self.patience # stop training if patience exceeded
if stop:
LOGGER.info(f'EarlyStopping patience {self.patience} exceeded, stopping training.')
return stop
class ModelEMA:
""" Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
Keep a moving average of everything in the model state_dict (parameters and buffers).
This is intended to allow functionality like
https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
A smoothed version of the weights is necessary for some training schemes to perform well.
This class is sensitive where it is initialized in the sequence of model init,
GPU assignment and distributed training wrappers.
"""
def __init__(self, model, decay=0.9999, updates=0):
# Create EMA
self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA
# if next(model.parameters()).device.type != 'cpu':
# self.ema.half() # FP16 EMA
self.updates = updates # number of EMA updates
self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)
for p in self.ema.parameters():
p.requires_grad_(False)
def update(self, model):
# Update EMA parameters
with torch.no_grad():
self.updates += 1
d = self.decay(self.updates)
msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict
for k, v in self.ema.state_dict().items():
if v.dtype.is_floating_point:
v *= d
v += (1. - d) * msd[k].detach()
def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
# Update EMA attributes
copy_attr(self.ema, model, include, exclude)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment